diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs index 99acd3bd1..94f75441b 100644 --- a/src/bin/cratesfyi.rs +++ b/src/bin/cratesfyi.rs @@ -1,5 +1,6 @@ use std::env; use std::fmt::Write; +use std::net::SocketAddr; use std::path::PathBuf; use std::str::FromStr; use std::sync::Arc; @@ -14,8 +15,8 @@ use docs_rs::utils::{ remove_crate_priority, set_crate_priority, ConfigName, }; use docs_rs::{ - start_web_server, BuildQueue, Config, Context, Index, Metrics, PackageKind, RustwideBuilder, - Storage, + start_background_metrics_webserver, start_web_server, BuildQueue, Config, Context, Index, + InstanceMetrics, PackageKind, RustwideBuilder, ServiceMetrics, Storage, }; use humantime::Duration; use once_cell::sync::OnceCell; @@ -107,10 +108,12 @@ enum CommandLine { /// Starts web server StartWebServer { #[arg(name = "SOCKET_ADDR", default_value = "0.0.0.0:3000")] - socket_addr: String, + socket_addr: SocketAddr, }, StartRegistryWatcher { + #[arg(name = "SOCKET_ADDR", default_value = "0.0.0.0:3000")] + metric_server_socket_addr: SocketAddr, /// Enable or disable the repository stats updater #[arg( long = "repository-stats-updater", @@ -122,7 +125,10 @@ enum CommandLine { cdn_invalidator: Toggle, }, - StartBuildServer, + StartBuildServer { + #[arg(name = "SOCKET_ADDR", default_value = "0.0.0.0:3000")] + metric_server_socket_addr: SocketAddr, + }, /// Starts the daemon Daemon { @@ -154,6 +160,7 @@ impl CommandLine { subcommand, } => subcommand.handle_args(ctx, skip_if_exists)?, Self::StartRegistryWatcher { + metric_server_socket_addr, repository_stats_updater, cdn_invalidator, } => { @@ -164,16 +171,22 @@ impl CommandLine { docs_rs::utils::daemon::start_background_cdn_invalidator(&ctx)?; } + start_background_metrics_webserver(Some(metric_server_socket_addr), &ctx)?; + docs_rs::utils::watch_registry(ctx.build_queue()?, ctx.config()?, ctx.index()?)?; } - Self::StartBuildServer => { + Self::StartBuildServer { + metric_server_socket_addr, + } => { + start_background_metrics_webserver(Some(metric_server_socket_addr), &ctx)?; + let build_queue = ctx.build_queue()?; let rustwide_builder = RustwideBuilder::init(&ctx)?; queue_builder(rustwide_builder, build_queue)?; } Self::StartWebServer { socket_addr } => { // Blocks indefinitely - start_web_server(Some(&socket_addr), &ctx)?; + start_web_server(Some(socket_addr), &ctx)?; } Self::Daemon { registry_watcher } => { docs_rs::utils::start_daemon(ctx, registry_watcher == Toggle::Enabled)?; @@ -699,7 +712,8 @@ struct BinContext { cdn: OnceCell>, config: OnceCell>, pool: OnceCell, - metrics: OnceCell>, + service_metrics: OnceCell>, + instance_metrics: OnceCell>, index: OnceCell>, repository_stats_updater: OnceCell>, runtime: OnceCell>, @@ -713,7 +727,8 @@ impl BinContext { cdn: OnceCell::new(), config: OnceCell::new(), pool: OnceCell::new(), - metrics: OnceCell::new(), + service_metrics: OnceCell::new(), + instance_metrics: OnceCell::new(), index: OnceCell::new(), repository_stats_updater: OnceCell::new(), runtime: OnceCell::new(), @@ -740,13 +755,13 @@ impl Context for BinContext { lazy! { fn build_queue(self) -> BuildQueue = BuildQueue::new( self.pool()?, - self.metrics()?, + self.instance_metrics()?, self.config()?, self.storage()?, ); fn storage(self) -> Storage = Storage::new( self.pool()?, - self.metrics()?, + self.instance_metrics()?, self.config()?, self.runtime()?, )?; @@ -755,7 +770,8 @@ impl Context for BinContext { &self.runtime()?, ); fn config(self) -> Config = Config::from_env()?; - fn metrics(self) -> Metrics = Metrics::new()?; + fn service_metrics(self) -> ServiceMetrics = ServiceMetrics::new()?; + fn instance_metrics(self) -> InstanceMetrics = InstanceMetrics::new()?; fn runtime(self) -> Runtime = { Builder::new_multi_thread() .enable_all() @@ -780,7 +796,9 @@ impl Context for BinContext { fn pool(&self) -> Result { Ok(self .pool - .get_or_try_init::<_, Error>(|| Ok(Pool::new(&*self.config()?, self.metrics()?)?))? + .get_or_try_init::<_, Error>(|| { + Ok(Pool::new(&*self.config()?, self.instance_metrics()?)?) + })? .clone()) } } diff --git a/src/build_queue.rs b/src/build_queue.rs index e828b6a25..82bc95c1a 100644 --- a/src/build_queue.rs +++ b/src/build_queue.rs @@ -4,7 +4,7 @@ use crate::docbuilder::PackageKind; use crate::error::Result; use crate::storage::Storage; use crate::utils::{get_config, get_crate_priority, report_error, retry, set_config, ConfigName}; -use crate::{Config, Index, Metrics, RustwideBuilder}; +use crate::{Config, Index, InstanceMetrics, RustwideBuilder}; use anyhow::Context; use fn_error_context::context; @@ -28,14 +28,14 @@ pub struct BuildQueue { config: Arc, storage: Arc, pub(crate) db: Pool, - metrics: Arc, + metrics: Arc, max_attempts: i32, } impl BuildQueue { pub fn new( db: Pool, - metrics: Arc, + metrics: Arc, config: Arc, storage: Arc, ) -> Self { @@ -600,7 +600,7 @@ mod tests { assert!(!called, "there were still items in the queue"); // Ensure metrics were recorded correctly - let metrics = env.metrics(); + let metrics = env.instance_metrics(); assert_eq!(metrics.total_builds.get(), 9); assert_eq!(metrics.failed_builds.get(), 1); diff --git a/src/cdn.rs b/src/cdn.rs index cdde4d35a..d0077d3e8 100644 --- a/src/cdn.rs +++ b/src/cdn.rs @@ -1,4 +1,4 @@ -use crate::{metrics::duration_to_seconds, utils::report_error, Config, Metrics}; +use crate::{metrics::duration_to_seconds, utils::report_error, Config, InstanceMetrics}; use anyhow::{anyhow, bail, Context, Error, Result}; use aws_sdk_cloudfront::{ config::{retry::RetryConfig, Region}, @@ -303,7 +303,7 @@ impl CdnBackend { #[instrument(skip(conn))] pub(crate) fn handle_queued_invalidation_requests( cdn: &CdnBackend, - metrics: &Metrics, + metrics: &InstanceMetrics, conn: &mut impl postgres::GenericClient, distribution_id: &str, ) -> Result<()> { @@ -707,13 +707,13 @@ mod tests { // now handle the queued invalidations handle_queued_invalidation_requests( &env.cdn(), - &env.metrics(), + &env.instance_metrics(), &mut *conn, "distribution_id_web", )?; handle_queued_invalidation_requests( &env.cdn(), - &env.metrics(), + &env.instance_metrics(), &mut *conn, "distribution_id_static", )?; @@ -741,13 +741,13 @@ mod tests { // now handle again handle_queued_invalidation_requests( &env.cdn(), - &env.metrics(), + &env.instance_metrics(), &mut *conn, "distribution_id_web", )?; handle_queued_invalidation_requests( &env.cdn(), - &env.metrics(), + &env.instance_metrics(), &mut *conn, "distribution_id_static", )?; @@ -806,7 +806,7 @@ mod tests { // handle the queued invalidations handle_queued_invalidation_requests( &env.cdn(), - &env.metrics(), + &env.instance_metrics(), &mut *conn, "distribution_id_web", )?; @@ -860,7 +860,7 @@ mod tests { // handle the queued invalidations handle_queued_invalidation_requests( &env.cdn(), - &env.metrics(), + &env.instance_metrics(), &mut *conn, "distribution_id_web", )?; @@ -886,7 +886,7 @@ mod tests { // now handle again handle_queued_invalidation_requests( &env.cdn(), - &env.metrics(), + &env.instance_metrics(), &mut *conn, "distribution_id_web", )?; @@ -921,7 +921,7 @@ mod tests { // run the handler handle_queued_invalidation_requests( &env.cdn(), - &env.metrics(), + &env.instance_metrics(), &mut *conn, "distribution_id_web", )?; diff --git a/src/context.rs b/src/context.rs index ff28681ae..482717189 100644 --- a/src/context.rs +++ b/src/context.rs @@ -2,7 +2,7 @@ use crate::cdn::CdnBackend; use crate::db::Pool; use crate::error::Result; use crate::repositories::RepositoryStatsUpdater; -use crate::{BuildQueue, Config, Index, Metrics, Storage}; +use crate::{BuildQueue, Config, Index, InstanceMetrics, ServiceMetrics, Storage}; use std::sync::Arc; use tokio::runtime::Runtime; @@ -12,7 +12,8 @@ pub trait Context { fn storage(&self) -> Result>; fn cdn(&self) -> Result>; fn pool(&self) -> Result; - fn metrics(&self) -> Result>; + fn service_metrics(&self) -> Result>; + fn instance_metrics(&self) -> Result>; fn index(&self) -> Result>; fn repository_stats_updater(&self) -> Result>; fn runtime(&self) -> Result>; diff --git a/src/db/pool.rs b/src/db/pool.rs index ca1e30abc..8660e7b85 100644 --- a/src/db/pool.rs +++ b/src/db/pool.rs @@ -1,4 +1,4 @@ -use crate::metrics::Metrics; +use crate::metrics::InstanceMetrics; use crate::Config; use postgres::{Client, NoTls}; use r2d2_postgres::PostgresConnectionManager; @@ -15,12 +15,12 @@ pub struct Pool { pool: Arc>>>>, #[cfg(not(test))] pool: r2d2::Pool>, - metrics: Arc, + metrics: Arc, max_size: u32, } impl Pool { - pub fn new(config: &Config, metrics: Arc) -> Result { + pub fn new(config: &Config, metrics: Arc) -> Result { debug!( "creating database pool (if this hangs, consider running `docker-compose up -d db s3`)" ); @@ -30,13 +30,17 @@ impl Pool { #[cfg(test)] pub(crate) fn new_with_schema( config: &Config, - metrics: Arc, + metrics: Arc, schema: &str, ) -> Result { Self::new_inner(config, metrics, schema) } - fn new_inner(config: &Config, metrics: Arc, schema: &str) -> Result { + fn new_inner( + config: &Config, + metrics: Arc, + schema: &str, + ) -> Result { let url = config .database_url .parse() diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index 8df0e3876..58fc27fce 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -13,7 +13,7 @@ use crate::utils::{ }; use crate::RUSTDOC_STATIC_STORAGE_PREFIX; use crate::{db::blacklist::is_blacklisted, utils::MetadataPackage}; -use crate::{Config, Context, Index, Metrics, Storage}; +use crate::{Config, Context, Index, InstanceMetrics, Storage}; use anyhow::{anyhow, bail, Context as _, Error}; use docsrs_metadata::{BuildTargets, Metadata, DEFAULT_TARGETS, HOST_TARGET}; use failure::Error as FailureError; @@ -45,7 +45,7 @@ pub struct RustwideBuilder { config: Arc, db: Pool, storage: Arc, - metrics: Arc, + metrics: Arc, index: Arc, rustc_version: String, repository_stats_updater: Arc, @@ -92,7 +92,7 @@ impl RustwideBuilder { config, db: context.pool()?, storage: context.storage()?, - metrics: context.metrics()?, + metrics: context.instance_metrics()?, index: context.index()?, rustc_version: String::new(), repository_stats_updater: context.repository_stats_updater()?, diff --git a/src/lib.rs b/src/lib.rs index 751204914..d53fd32b3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,9 +8,9 @@ pub use self::context::Context; pub use self::docbuilder::PackageKind; pub use self::docbuilder::RustwideBuilder; pub use self::index::Index; -pub use self::metrics::Metrics; +pub use self::metrics::{InstanceMetrics, ServiceMetrics}; pub use self::storage::Storage; -pub use self::web::start_web_server; +pub use self::web::{start_background_metrics_webserver, start_web_server}; mod build_queue; pub mod cdn; diff --git a/src/metrics/macros.rs b/src/metrics/macros.rs index 968f2c325..e4b7ec6f7 100644 --- a/src/metrics/macros.rs +++ b/src/metrics/macros.rs @@ -2,7 +2,6 @@ pub(super) trait MetricFromOpts: Sized { fn from_opts(opts: prometheus::Opts) -> Result; } -#[macro_export] macro_rules! metrics { ( $vis:vis struct $name:ident { @@ -82,7 +81,6 @@ macro_rules! metrics { }; } -#[macro_export] macro_rules! load_metric_type { ($name:ident as single) => { use prometheus::$name; diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index 9d2bcc937..2358899c2 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -36,21 +36,7 @@ pub const CDN_INVALIDATION_HISTOGRAM_BUCKETS: &[f64; 11] = &[ ]; metrics! { - pub struct Metrics { - /// Number of crates in the build queue - queued_crates_count: IntGauge, - /// Number of crates in the build queue that have a positive priority - prioritized_crates_count: IntGauge, - /// Number of crates that failed to build - failed_crates_count: IntGauge, - /// Whether the build queue is locked - queue_is_locked: IntGauge, - /// queued crates by priority - queued_crates_count_by_priority: IntGaugeVec["priority"], - - /// queued CDN invalidations - queued_cdn_invalidations_by_distribution: IntGaugeVec["distribution"], - + pub struct InstanceMetrics { /// The number of idle database connections idle_db_connections: IntGauge, /// The number of used database connections @@ -143,7 +129,7 @@ impl RecentlyAccessedReleases { .insert((version, TargetAtom::from(target)), now); } - pub(crate) fn gather(&self, metrics: &Metrics) { + pub(crate) fn gather(&self, metrics: &InstanceMetrics) { fn inner(map: &DashMap, metric: &IntGaugeVec) { let mut hour_count = 0; let mut half_hour_count = 0; @@ -182,18 +168,112 @@ impl RecentlyAccessedReleases { } } -impl Metrics { +impl InstanceMetrics { + pub(crate) fn gather(&self, pool: &Pool) -> Result, Error> { + self.idle_db_connections.set(pool.idle_connections() as i64); + self.used_db_connections.set(pool.used_connections() as i64); + self.max_db_connections.set(pool.max_size() as i64); + + self.recently_accessed_releases.gather(self); + self.gather_system_performance(); + Ok(self.registry.gather()) + } + + #[cfg(not(target_os = "linux"))] + fn gather_system_performance(&self) {} + + #[cfg(target_os = "linux")] + fn gather_system_performance(&self) { + use procfs::process::Process; + + let process = Process::myself().unwrap(); + self.open_file_descriptors + .set(process.fd_count().unwrap() as i64); + self.running_threads + .set(process.stat().unwrap().num_threads); + } +} + +fn metric_from_opts( + registry: &prometheus::Registry, + metric: &str, + help: &str, + variable_label: Option<&str>, +) -> Result { + let mut opts = prometheus::Opts::new(metric, help).namespace("docsrs"); + + if let Some(label) = variable_label { + opts = opts.variable_label(label); + } + + let metric = T::from_opts(opts)?; + registry.register(Box::new(metric.clone()))?; + Ok(metric) +} + +#[derive(Debug)] +pub struct ServiceMetrics { + pub queued_crates_count: IntGauge, + pub prioritized_crates_count: IntGauge, + pub failed_crates_count: IntGauge, + pub queue_is_locked: IntGauge, + pub queued_crates_count_by_priority: IntGaugeVec, + pub queued_cdn_invalidations_by_distribution: IntGaugeVec, + + registry: prometheus::Registry, +} + +impl ServiceMetrics { + pub fn new() -> Result { + let registry = prometheus::Registry::new(); + Ok(Self { + registry: registry.clone(), + queued_crates_count: metric_from_opts( + ®istry, + "queued_crates_count", + "Number of crates in the build queue", + None, + )?, + prioritized_crates_count: metric_from_opts( + ®istry, + "prioritized_crates_count", + "Number of crates in the build queue that have a positive priority", + None, + )?, + failed_crates_count: metric_from_opts( + ®istry, + "failed_crates_count", + "Number of crates that failed to build", + None, + )?, + queue_is_locked: metric_from_opts( + ®istry, + "queue_is_locked", + "Whether the build queue is locked", + None, + )?, + queued_crates_count_by_priority: metric_from_opts( + ®istry, + "queued_crates_count_by_priority", + "queued crates by priority", + Some("priority"), + )?, + queued_cdn_invalidations_by_distribution: metric_from_opts( + ®istry, + "queued_cdn_invalidations_by_distribution", + "queued CDN invalidations", + Some("distribution"), + )?, + }) + } + pub(crate) fn gather( &self, pool: &Pool, queue: &BuildQueue, config: &Config, ) -> Result, Error> { - self.idle_db_connections.set(pool.idle_connections() as i64); - self.used_db_connections.set(pool.used_connections() as i64); - self.max_db_connections.set(pool.max_size() as i64); self.queue_is_locked.set(queue.is_locked()? as i64); - self.queued_crates_count.set(queue.pending_count()? as i64); self.prioritized_crates_count .set(queue.prioritized_count()? as i64); @@ -216,23 +296,6 @@ impl Metrics { } self.failed_crates_count.set(queue.failed_count()? as i64); - - self.recently_accessed_releases.gather(self); - self.gather_system_performance(); Ok(self.registry.gather()) } - - #[cfg(not(target_os = "linux"))] - fn gather_system_performance(&self) {} - - #[cfg(target_os = "linux")] - fn gather_system_performance(&self) { - use procfs::process::Process; - - let process = Process::myself().unwrap(); - self.open_file_descriptors - .set(process.fd_count().unwrap() as i64); - self.running_threads - .set(process.stat().unwrap().num_threads); - } } diff --git a/src/storage/database.rs b/src/storage/database.rs index f937e3b6e..bfdc0eec4 100644 --- a/src/storage/database.rs +++ b/src/storage/database.rs @@ -1,17 +1,17 @@ use super::{Blob, FileRange, StorageTransaction}; use crate::db::Pool; use crate::error::Result; -use crate::Metrics; +use crate::InstanceMetrics; use postgres::Transaction; use std::{convert::TryFrom, sync::Arc}; pub(crate) struct DatabaseBackend { pool: Pool, - metrics: Arc, + metrics: Arc, } impl DatabaseBackend { - pub(crate) fn new(pool: Pool, metrics: Arc) -> Self { + pub(crate) fn new(pool: Pool, metrics: Arc) -> Self { Self { pool, metrics } } @@ -132,7 +132,7 @@ impl DatabaseBackend { pub(super) struct DatabaseClient { conn: crate::db::PoolClient, - metrics: Arc, + metrics: Arc, } impl DatabaseClient { @@ -146,7 +146,7 @@ impl DatabaseClient { pub(super) struct DatabaseStorageTransaction<'a> { transaction: Transaction<'a>, - metrics: &'a Metrics, + metrics: &'a InstanceMetrics, } impl<'a> StorageTransaction for DatabaseStorageTransaction<'a> { diff --git a/src/storage/mod.rs b/src/storage/mod.rs index a65c51d58..c99989ac7 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -10,7 +10,7 @@ use self::s3::S3Backend; use self::sqlite_pool::SqliteConnectionPool; use crate::error::Result; use crate::web::metrics::RenderingTimesRecorder; -use crate::{db::Pool, Config, Metrics}; +use crate::{db::Pool, Config, InstanceMetrics}; use anyhow::{anyhow, ensure}; use chrono::{DateTime, Utc}; use path_slash::PathExt; @@ -122,7 +122,7 @@ pub struct Storage { impl Storage { pub fn new( pool: Pool, - metrics: Arc, + metrics: Arc, config: Arc, runtime: Arc, ) -> Result { @@ -782,7 +782,7 @@ mod backend_tests { Ok(()) } - fn test_store_blobs(storage: &Storage, metrics: &Metrics) -> Result<()> { + fn test_store_blobs(storage: &Storage, metrics: &InstanceMetrics) -> Result<()> { const NAMES: &[&str] = &[ "a", "b", @@ -822,7 +822,7 @@ mod backend_tests { Ok(()) } - fn test_store_all_in_archive(storage: &Storage, metrics: &Metrics) -> Result<()> { + fn test_store_all_in_archive(storage: &Storage, metrics: &InstanceMetrics) -> Result<()> { let dir = tempfile::Builder::new() .prefix("docs.rs-upload-archive-test") .tempdir()?; @@ -891,7 +891,7 @@ mod backend_tests { Ok(()) } - fn test_store_all(storage: &Storage, metrics: &Metrics) -> Result<()> { + fn test_store_all(storage: &Storage, metrics: &InstanceMetrics) -> Result<()> { let dir = tempfile::Builder::new() .prefix("docs.rs-upload-test") .tempdir()?; @@ -1072,7 +1072,7 @@ mod backend_tests { #[test] fn $test() { crate::test::wrapper(|env| { - super::$test(&*get_storage(env), &*env.metrics()) + super::$test(&*get_storage(env), &*env.instance_metrics()) }); } )* diff --git a/src/storage/s3.rs b/src/storage/s3.rs index 7c697f115..abb3ba77e 100644 --- a/src/storage/s3.rs +++ b/src/storage/s3.rs @@ -1,5 +1,5 @@ use super::{Blob, FileRange, StorageTransaction}; -use crate::{Config, Metrics}; +use crate::{Config, InstanceMetrics}; use anyhow::Error; use aws_sdk_s3::{ config::{retry::RetryConfig, Region}, @@ -25,14 +25,14 @@ pub(super) struct S3Backend { client: Client, runtime: Arc, bucket: String, - metrics: Arc, + metrics: Arc, #[cfg(test)] temporary: bool, } impl S3Backend { pub(super) fn new( - metrics: Arc, + metrics: Arc, config: &Config, runtime: Arc, ) -> Result { diff --git a/src/test/mod.rs b/src/test/mod.rs index c5cfc2383..1d9f59bd6 100644 --- a/src/test/mod.rs +++ b/src/test/mod.rs @@ -7,7 +7,7 @@ use crate::error::Result; use crate::repositories::RepositoryStatsUpdater; use crate::storage::{Storage, StorageKind}; use crate::web::{build_axum_app, cache, page::TemplateData}; -use crate::{BuildQueue, Config, Context, Index, Metrics}; +use crate::{BuildQueue, Config, Context, Index, InstanceMetrics, ServiceMetrics}; use anyhow::Context as _; use fn_error_context::context; use once_cell::unsync::OnceCell; @@ -216,7 +216,8 @@ pub(crate) struct TestEnvironment { cdn: OnceCell>, index: OnceCell>, runtime: OnceCell>, - metrics: OnceCell>, + instance_metrics: OnceCell>, + service_metrics: OnceCell>, frontend: OnceCell, repository_stats_updater: OnceCell>, } @@ -247,7 +248,8 @@ impl TestEnvironment { storage: OnceCell::new(), cdn: OnceCell::new(), index: OnceCell::new(), - metrics: OnceCell::new(), + instance_metrics: OnceCell::new(), + service_metrics: OnceCell::new(), frontend: OnceCell::new(), runtime: OnceCell::new(), repository_stats_updater: OnceCell::new(), @@ -314,7 +316,7 @@ impl TestEnvironment { .get_or_init(|| { Arc::new(BuildQueue::new( self.db().pool(), - self.metrics(), + self.instance_metrics(), self.config(), self.storage(), )) @@ -340,7 +342,7 @@ impl TestEnvironment { Arc::new( Storage::new( self.db().pool(), - self.metrics(), + self.instance_metrics(), self.config(), self.runtime(), ) @@ -350,11 +352,22 @@ impl TestEnvironment { .clone() } - pub(crate) fn metrics(&self) -> Arc { - self.metrics - .get_or_init(|| Arc::new(Metrics::new().expect("failed to initialize the metrics"))) + pub(crate) fn instance_metrics(&self) -> Arc { + self.instance_metrics + .get_or_init(|| { + Arc::new(InstanceMetrics::new().expect("failed to initialize the instance metrics")) + }) + .clone() + } + + pub(crate) fn service_metrics(&self) -> Arc { + self.service_metrics + .get_or_init(|| { + Arc::new(ServiceMetrics::new().expect("failed to initialize the service metrics")) + }) .clone() } + pub(crate) fn runtime(&self) -> Arc { self.runtime .get_or_init(|| { @@ -395,7 +408,8 @@ impl TestEnvironment { pub(crate) fn db(&self) -> &TestDatabase { self.db.get_or_init(|| { - TestDatabase::new(&self.config(), self.metrics()).expect("failed to initialize the db") + TestDatabase::new(&self.config(), self.instance_metrics()) + .expect("failed to initialize the db") }) } @@ -438,8 +452,12 @@ impl Context for TestEnvironment { Ok(self.db().pool()) } - fn metrics(&self) -> Result> { - Ok(self.metrics()) + fn instance_metrics(&self) -> Result> { + Ok(self.instance_metrics()) + } + + fn service_metrics(&self) -> Result> { + Ok(self.service_metrics()) } fn index(&self) -> Result> { @@ -461,7 +479,7 @@ pub(crate) struct TestDatabase { } impl TestDatabase { - fn new(config: &Config, metrics: Arc) -> Result { + fn new(config: &Config, metrics: Arc) -> Result { // A random schema name is generated and used for the current connection. This allows each // test to create a fresh instance of the database to run within. let schema = format!("docs_rs_test_schema_{}", rand::random::()); diff --git a/src/utils/daemon.rs b/src/utils/daemon.rs index 07dadbd45..c13f55d78 100644 --- a/src/utils/daemon.rs +++ b/src/utils/daemon.rs @@ -82,7 +82,7 @@ pub fn start_background_repository_stats_updater(context: &dyn Context) -> Resul pub fn start_background_cdn_invalidator(context: &dyn Context) -> Result<(), Error> { let cdn = context.cdn()?; - let metrics = context.metrics()?; + let metrics = context.instance_metrics()?; let config = context.config()?; let pool = context.pool()?; diff --git a/src/web/metrics.rs b/src/web/metrics.rs index 6f71edb7b..4a69af35e 100644 --- a/src/web/metrics.rs +++ b/src/web/metrics.rs @@ -1,43 +1,70 @@ use crate::{ db::Pool, metrics::duration_to_seconds, utils::spawn_blocking, web::error::AxumResult, - BuildQueue, Config, Metrics, + BuildQueue, Config, InstanceMetrics, ServiceMetrics, }; -use anyhow::Context as _; +use anyhow::{Context as _, Result}; use axum::{ - body::Body, extract::{Extension, MatchedPath}, http::Request as AxumRequest, - http::{ - header::{CONTENT_LENGTH, CONTENT_TYPE}, - Response as AxumHttpResponse, StatusCode, - }, + http::{header::CONTENT_TYPE, StatusCode}, middleware::Next, response::IntoResponse, }; -use prometheus::{Encoder, HistogramVec, TextEncoder}; +use prometheus::{proto::MetricFamily, Encoder, HistogramVec, TextEncoder}; use std::{borrow::Cow, sync::Arc, time::Instant}; #[cfg(test)] use tracing::debug; +async fn fetch_and_render_metrics( + fetch_metrics: impl Fn() -> Result> + Send + 'static, +) -> AxumResult { + let buffer = spawn_blocking(move || { + let metrics_families = fetch_metrics()?; + let mut buffer = Vec::new(); + TextEncoder::new() + .encode(&metrics_families, &mut buffer) + .context("error encoding metrics")?; + Ok(buffer) + }) + .await?; + + Ok(( + StatusCode::OK, + [(CONTENT_TYPE, mime::TEXT_PLAIN.as_ref())], + buffer, + )) +} + pub(super) async fn metrics_handler( Extension(pool): Extension, Extension(config): Extension>, - Extension(metrics): Extension>, + Extension(instance_metrics): Extension>, + Extension(service_metrics): Extension>, Extension(queue): Extension>, ) -> AxumResult { - let families = spawn_blocking(move || metrics.gather(&pool, &queue, &config)).await?; - - let mut buffer = Vec::new(); - TextEncoder::new() - .encode(&families, &mut buffer) - .context("error encoding metrics")?; - - Ok(AxumHttpResponse::builder() - .status(StatusCode::OK) - .header(CONTENT_TYPE, mime::TEXT_PLAIN.as_ref()) - .header(CONTENT_LENGTH, buffer.len()) - .body(Body::from(buffer)) - .context("error generating response")?) + fetch_and_render_metrics(move || { + let mut families = Vec::new(); + families.extend_from_slice(&instance_metrics.gather(&pool)?); + families.extend_from_slice(&service_metrics.gather(&pool, &queue, &config)?); + Ok(families) + }) + .await +} + +pub(super) async fn service_metrics_handler( + Extension(pool): Extension, + Extension(config): Extension>, + Extension(metrics): Extension>, + Extension(queue): Extension>, +) -> AxumResult { + fetch_and_render_metrics(move || metrics.gather(&pool, &queue, &config)).await +} + +pub(super) async fn instance_metrics_handler( + Extension(pool): Extension, + Extension(metrics): Extension>, +) -> AxumResult { + fetch_and_render_metrics(move || metrics.gather(&pool)).await } /// Request recorder middleware @@ -66,7 +93,7 @@ pub(crate) async fn request_recorder( let metrics = request .extensions() - .get::>() + .get::>() .expect("metrics missing in request extensions") .clone(); @@ -135,7 +162,7 @@ impl Drop for RenderingTimesRecorder<'_> { #[cfg(test)] mod tests { - use crate::test::{assert_success, wrapper}; + use crate::test::wrapper; use crate::Context; use std::collections::HashMap; @@ -186,7 +213,7 @@ mod tests { .create()?; let frontend = env.frontend(); - let metrics = env.metrics(); + let metrics = env.instance_metrics(); for (route, _) in ROUTES.iter() { frontend.get(route).send()?; @@ -200,8 +227,7 @@ mod tests { } // this shows what the routes were *actually* recorded as, making it easier to update ROUTES if the name changes. - let metrics_serialized = - metrics.gather(&env.pool()?, &env.build_queue(), &env.config())?; + let metrics_serialized = metrics.gather(&env.pool()?)?; let all_routes_visited = metrics_serialized .iter() .find(|x| x.get_name() == "docsrs_routes_visited") @@ -242,8 +268,39 @@ mod tests { #[test] fn test_metrics_page_success() { wrapper(|env| { - let web = env.frontend(); - assert_success("/about/metrics", web) + let response = env.frontend().get("/about/metrics").send()?; + assert!(response.status().is_success()); + + let body = response.text()?; + assert!(body.contains("docsrs_failed_builds"), "{}", body); + assert!(body.contains("queued_crates_count"), "{}", body); + Ok(()) + }) + } + + #[test] + fn test_service_metrics_page_success() { + wrapper(|env| { + let response = env.frontend().get("/about/metrics/service").send()?; + assert!(response.status().is_success()); + + let body = response.text()?; + assert!(!body.contains("docsrs_failed_builds"), "{}", body); + assert!(body.contains("queued_crates_count"), "{}", body); + Ok(()) + }) + } + + #[test] + fn test_instance_metrics_page_success() { + wrapper(|env| { + let response = env.frontend().get("/about/metrics/instance").send()?; + assert!(response.status().is_success()); + + let body = response.text()?; + assert!(body.contains("docsrs_failed_builds"), "{}", body); + assert!(!body.contains("queued_crates_count"), "{}", body); + Ok(()) }) } } diff --git a/src/web/mod.rs b/src/web/mod.rs index 2e848c041..a4db97c33 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -3,7 +3,7 @@ pub mod page; use crate::utils::get_correct_docsrs_style_file; -use crate::utils::spawn_blocking; +use crate::utils::{report_error, spawn_blocking}; use anyhow::{anyhow, bail, Context as _, Result}; use axum_extra::middleware::option_layer; use serde_json::Value; @@ -46,8 +46,12 @@ use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS}; use postgres::Client; use semver::{Version, VersionReq}; use serde::Serialize; -use std::borrow::Borrow; -use std::{borrow::Cow, net::SocketAddr, sync::Arc}; +use std::net::{IpAddr, Ipv4Addr}; +use std::{ + borrow::{Borrow, Cow}, + net::SocketAddr, + sync::Arc, +}; use tower::ServiceBuilder; use tower_http::{catch_panic::CatchPanicLayer, timeout::TimeoutLayer, trace::TraceLayer}; use url::form_urlencoded; @@ -61,7 +65,7 @@ pub(crate) fn encode_url_path(path: &str) -> String { utf8_percent_encode(path, PATH).to_string() } -const DEFAULT_BIND: &str = "0.0.0.0:3000"; +const DEFAULT_BIND: SocketAddr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 3000); #[derive(Debug)] struct MatchVersion { @@ -262,15 +266,14 @@ async fn log_timeouts_to_sentry(req: AxumRequest, next: Next) -> AxumRe response } -#[instrument(skip_all)] -pub(crate) fn build_axum_app( +fn apply_middleware( + router: AxumRouter, context: &dyn Context, - template_data: Arc, -) -> Result { + template_data: Option>, +) -> Result { let config = context.config()?; - Ok(routes::build_axum_routes().layer( - // It’s recommended to use tower::ServiceBuilder to apply multiple middleware at once, - // instead of calling Router::layer repeatedly: + let has_templates = template_data.is_some(); + Ok(router.layer( ServiceBuilder::new() .layer(TraceLayer::new_for_http()) .layer(sentry_tower::NewSentryLayer::new_from_top()) @@ -284,27 +287,67 @@ pub(crate) fn build_axum_app( .layer(option_layer(config.request_timeout.map(TimeoutLayer::new))) .layer(Extension(context.pool()?)) .layer(Extension(context.build_queue()?)) - .layer(Extension(context.metrics()?)) + .layer(Extension(context.service_metrics()?)) + .layer(Extension(context.instance_metrics()?)) .layer(Extension(context.config()?)) .layer(Extension(context.storage()?)) .layer(Extension(context.repository_stats_updater()?)) - .layer(Extension(template_data)) + .layer(option_layer(template_data.map(Extension))) .layer(middleware::from_fn(csp::csp_middleware)) - .layer(middleware::from_fn( + .layer(option_layer(has_templates.then_some(middleware::from_fn( page::web_page::render_templates_middleware, - )) + )))) .layer(middleware::from_fn(cache::cache_middleware)), )) } +pub(crate) fn build_axum_app( + context: &dyn Context, + template_data: Arc, +) -> Result { + apply_middleware(routes::build_axum_routes(), context, Some(template_data)) +} + +pub(crate) fn build_metrics_axum_app(context: &dyn Context) -> Result { + apply_middleware(routes::build_metric_routes(), context, None) +} + +pub fn start_background_metrics_webserver( + addr: Option, + context: &dyn Context, +) -> Result<(), Error> { + let axum_addr: SocketAddr = addr.unwrap_or(DEFAULT_BIND); + + tracing::info!( + "Starting metrics web server on `{}:{}`", + axum_addr.ip(), + axum_addr.port() + ); + + let metrics_axum_app = build_metrics_axum_app(context)?.into_make_service(); + let runtime = context.runtime()?; + + runtime.spawn(async move { + if let Err(err) = axum::Server::bind(&axum_addr) + .serve(metrics_axum_app) + .await + .context("error running metrics web server") + { + report_error(&err); + } + }); + + Ok(()) +} + #[instrument(skip_all)] -pub fn start_web_server(addr: Option<&str>, context: &dyn Context) -> Result<(), Error> { +pub fn start_web_server(addr: Option, context: &dyn Context) -> Result<(), Error> { let template_data = Arc::new(TemplateData::new( &mut *context.pool()?.get()?, context.config()?.render_threads, )?); - let axum_addr: SocketAddr = addr.unwrap_or(DEFAULT_BIND).parse()?; + let axum_addr = addr.unwrap_or(DEFAULT_BIND); tracing::info!( "Starting web server on `{}:{}`", diff --git a/src/web/releases.rs b/src/web/releases.rs index f97f90556..f6a2b76f7 100644 --- a/src/web/releases.rs +++ b/src/web/releases.rs @@ -11,7 +11,7 @@ use crate::{ error::{AxumNope, AxumResult}, match_version_axum, }, - BuildQueue, Config, Metrics, + BuildQueue, Config, InstanceMetrics, }; use anyhow::{anyhow, Context as _, Result}; use axum::{ @@ -458,7 +458,7 @@ impl Default for Search { async fn redirect_to_random_crate( config: Arc, - metrics: Arc, + metrics: Arc, pool: Pool, ) -> AxumResult { // We try to find a random crate and redirect to it. @@ -520,7 +520,7 @@ impl_axum_webpage! { pub(crate) async fn search_handler( Extension(pool): Extension, Extension(config): Extension>, - Extension(metrics): Extension>, + Extension(metrics): Extension>, Query(mut params): Query>, ) -> AxumResult { let query = params diff --git a/src/web/routes.rs b/src/web/routes.rs index 6b4837a0b..f7c5cc2a1 100644 --- a/src/web/routes.rs +++ b/src/web/routes.rs @@ -77,6 +77,22 @@ async fn block_blacklisted_prefixes_middleware( next.run(request).await } +pub(super) fn build_metric_routes() -> AxumRouter { + AxumRouter::new() + .route_with_tsr( + "/about/metrics/instance", + get_internal(super::metrics::instance_metrics_handler), + ) + .route_with_tsr( + "/about/metrics/service", + get_internal(super::metrics::service_metrics_handler), + ) + .route_with_tsr( + "/about/metrics", + get_internal(super::metrics::metrics_handler), + ) +} + pub(super) fn build_axum_routes() -> AxumRouter { // hint for naming axum routes: // when routes overlap, the route parameters at the same position @@ -121,10 +137,7 @@ pub(super) fn build_axum_routes() -> AxumRouter { "/about/builds", get_internal(super::sitemap::about_builds_handler), ) - .route_with_tsr( - "/about/metrics", - get_internal(super::metrics::metrics_handler), - ) + .merge(build_metric_routes()) .route_with_tsr("/about", get_internal(super::sitemap::about_handler)) .route_with_tsr( "/about/:subpage", diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index d833188fa..87e0ce1ae 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -18,7 +18,7 @@ use crate::{ page::TemplateData, MatchSemver, MetaData, }, - Config, Metrics, Storage, RUSTDOC_STATIC_STORAGE_PREFIX, + Config, InstanceMetrics, Storage, RUSTDOC_STATIC_STORAGE_PREFIX, }; use anyhow::{anyhow, Context as _}; use axum::{ @@ -91,7 +91,7 @@ async fn try_serve_legacy_toolchain_asset( #[instrument(skip_all)] pub(crate) async fn rustdoc_redirector_handler( Path(params): Path, - Extension(metrics): Extension>, + Extension(metrics): Extension>, Extension(storage): Extension>, Extension(config): Extension>, Extension(pool): Extension, @@ -309,7 +309,7 @@ impl RustdocPage { rustdoc_html: &[u8], max_parse_memory: usize, templates: &TemplateData, - metrics: &Metrics, + metrics: &InstanceMetrics, config: &Config, file_path: &str, ) -> AxumResult { @@ -369,7 +369,7 @@ pub(crate) struct RustdocHtmlParams { #[instrument(skip_all)] pub(crate) async fn rustdoc_html_server_handler( Path(params): Path, - Extension(metrics): Extension>, + Extension(metrics): Extension>, Extension(templates): Extension>, Extension(pool): Extension, Extension(storage): Extension>,