diff --git a/docs/gathered-data.md b/docs/gathered-data.md index 05e4bc8da..8eefbbb21 100644 --- a/docs/gathered-data.md +++ b/docs/gathered-data.md @@ -1220,7 +1220,6 @@ Gathered metrics: - `console_helm_uninstalls_total` - `etcd_server_slow_apply_total` - `etcd_server_slow_read_indexes_total` - - followed by at most 1000 lines of `ALERTS` metric ### API Reference None @@ -1255,6 +1254,7 @@ None - `etcd_server_slow_apply_total` introduced in version 4.16+ - `etcd_server_slow_read_indexes_total` introduced in version 4.16+ - `haproxy_exporter_server_threshold` introduced in version 4.17+ +- `ALERTS` removed in version 4.17+ ## MutatingWebhookConfigurations diff --git a/docs/insights-archive-sample/config/metrics b/docs/insights-archive-sample/config/metrics index d6d686c84..6661f8b12 100644 --- a/docs/insights-archive-sample/config/metrics +++ b/docs/insights-archive-sample/config/metrics @@ -134,20 +134,4 @@ virt_platform{container="kube-rbac-proxy",endpoint="https",instance="ci-ln-k19wb virt_platform{container="kube-rbac-proxy",endpoint="https",instance="ci-ln-k19wbxk-f76d1-6qdmf-worker-b-94cjz",job="node-exporter",namespace="openshift-monitoring",pod="node-exporter-dmnrm",service="node-exporter",type="gcp",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 1 1620977245743 virt_platform{container="kube-rbac-proxy",endpoint="https",instance="ci-ln-k19wbxk-f76d1-6qdmf-worker-b-94cjz",job="node-exporter",namespace="openshift-monitoring",pod="node-exporter-dmnrm",service="node-exporter",type="kvm",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 1 1620977245743 virt_platform{container="kube-rbac-proxy",endpoint="https",instance="ci-ln-k19wbxk-f76d1-6qdmf-master-0",job="node-exporter",namespace="openshift-monitoring",pod="node-exporter-2dsf2",service="node-exporter",type="gcp",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 1 1620977246807 -virt_platform{container="kube-rbac-proxy",endpoint="https",instance="ci-ln-k19wbxk-f76d1-6qdmf-worker-c-44ttf",job="node-exporter",namespace="openshift-monitoring",pod="node-exporter-56fn6",service="node-exporter",type="gcp",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 1 1620977240435 -# ALERTS 16/1000 -# TYPE ALERTS untyped -ALERTS{alertname="Watchdog",alertstate="firing",severity="none",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 1 1612793310163 -ALERTS{alertname="KubeMemoryOvercommit",alertstate="firing",severity="warning",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 1 1612793311534 -ALERTS{alertname="AlertmanagerReceiversNotConfigured",alertstate="firing",severity="warning",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 1 1612793316662 -ALERTS{alertname="KubePodNotReady",alertstate="firing",namespace="openshift-etcd",pod="etcd-quorum-guard-587fd6c776-xg5zw",severity="warning",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 1 1612793320590 -ALERTS{alertname="KubePodNotReady",alertstate="firing",namespace="openshift-etcd",pod="etcd-quorum-guard-587fd6c776-czv8b",severity="warning",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 1 1612793320590 -ALERTS{alertname="KubePodNotReady",alertstate="firing",namespace="openshift-ingress",pod="router-default-6f59db78db-gh48w",severity="warning",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 1 1612793320590 -ALERTS{alertname="KubeDeploymentReplicasMismatch",alertstate="firing",container="kube-rbac-proxy-main",deployment="router-default",endpoint="https-main",instance="10.129.0.12:8443",job="kube-state-metrics",namespace="openshift-ingress",pod="kube-state-metrics-664f855c7f-9vbzh",service="kube-state-metrics",severity="warning",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 1 1612793320590 -ALERTS{alertname="KubeDeploymentReplicasMismatch",alertstate="firing",container="kube-rbac-proxy-main",deployment="etcd-quorum-guard",endpoint="https-main",instance="10.129.0.12:8443",job="kube-state-metrics",namespace="openshift-etcd",pod="kube-state-metrics-664f855c7f-9vbzh",service="kube-state-metrics",severity="warning",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 1 1612793320590 -ALERTS{alertname="etcdHighCommitDurations",alertstate="firing",endpoint="etcd-metrics",instance="10.10.94.191:9979",job="etcd",namespace="openshift-etcd",pod="etcd-master-0.tremes.lab.rdu2.cee.redhat.com",service="etcd",severity="warning",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 1 1612793295353 -ALERTS{alertname="ClusterOperatorDown",alertstate="firing",endpoint="metrics",instance="10.10.94.191:9099",job="cluster-version-operator",name="ingress",namespace="openshift-cluster-version",pod="cluster-version-operator-644d79c75d-xl7z5",service="cluster-version-operator",severity="critical",version="4.6.15",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 1 1612793309213 -ALERTS{alertname="ClusterOperatorDegraded",alertstate="firing",condition="Degraded",endpoint="metrics",instance="10.10.94.191:9099",job="cluster-version-operator",name="ingress",namespace="openshift-cluster-version",pod="cluster-version-operator-644d79c75d-xl7z5",reason="IngressControllersDegraded",service="cluster-version-operator",severity="critical",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 1 1612793309213 -ALERTS{alertname="KubeAPIErrorBudgetBurn",alertstate="pending",long="3d",severity="warning",short="6h",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 1 1612793298512 -ALERTS{alertname="ClusterNotUpgradeable",alertstate="firing",condition="Upgradeable",endpoint="metrics",name="version",severity="warning",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} 1 1612793309213 -ALERTS{alertname="KubePodNotReady",alertstate="pending",namespace="openshift-insights",pod="insights-operator-f7df674b4-x9qtw",severity="warning",instance="",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-1"} +virt_platform{container="kube-rbac-proxy",endpoint="https",instance="ci-ln-k19wbxk-f76d1-6qdmf-worker-c-44ttf",job="node-exporter",namespace="openshift-monitoring",pod="node-exporter-56fn6",service="node-exporter",type="gcp",prometheus="openshift-monitoring/k8s",prometheus_replica="prometheus-k8s-0"} 1 1620977240435 \ No newline at end of file diff --git a/pkg/gatherers/clusterconfig/gather_most_recent_metrics.go b/pkg/gatherers/clusterconfig/gather_most_recent_metrics.go index 1d689b58c..7f3bbe5bc 100644 --- a/pkg/gatherers/clusterconfig/gather_most_recent_metrics.go +++ b/pkg/gatherers/clusterconfig/gather_most_recent_metrics.go @@ -2,25 +2,14 @@ package clusterconfig import ( "context" - "fmt" - "io" "k8s.io/client-go/rest" "k8s.io/klog/v2" "github.com/openshift/insights-operator/pkg/record" - "github.com/openshift/insights-operator/pkg/utils" "github.com/openshift/insights-operator/pkg/utils/marshal" ) -const ( - // metricsAlertsLinesLimit is the maximal number of lines read from monitoring Prometheus - // 500 KiB of alerts is limit, one alert line has typically 450 bytes => 1137 lines. - // This number has been rounded to 1000 for simplicity. - // Formerly, the `500 * 1024 / 450` expression was used instead. - metricsAlertsLinesLimit = 1000 -) - // GatherMostRecentMetrics Collects cluster Federated Monitoring metrics. // // The GET REST query to URL /federate @@ -34,7 +23,6 @@ const ( // - `console_helm_uninstalls_total` // - `etcd_server_slow_apply_total` // - `etcd_server_slow_read_indexes_total` -// - followed by at most 1000 lines of `ALERTS` metric // // ### API Reference // None @@ -69,6 +57,7 @@ const ( // - `etcd_server_slow_apply_total` introduced in version 4.16+ // - `etcd_server_slow_read_indexes_total` introduced in version 4.16+ // - `haproxy_exporter_server_threshold` introduced in version 4.17+ +// - `ALERTS` removed in version 4.17+ func (g *Gatherer) GatherMostRecentMetrics(ctx context.Context) ([]record.Record, []error) { metricsRESTClient, err := rest.RESTClientFor(g.metricsGatherKubeConfig) if err != nil { @@ -99,32 +88,6 @@ func gatherMostRecentMetrics(ctx context.Context, metricsClient rest.Interface) return nil, []error{err} } - rsp, err := metricsClient.Get().AbsPath("federate"). - Param("match[]", "ALERTS"). - Stream(ctx) - if err != nil { - klog.Errorf("Unable to retrieve most recent alerts from metrics: %v", err) - return nil, []error{err} - } - r := utils.NewLineLimitReader(rsp, metricsAlertsLinesLimit) - alerts, err := io.ReadAll(r) - if err != nil && err != io.EOF { - klog.Errorf("Unable to read most recent alerts from metrics: %v", err) - return nil, []error{err} - } - - remainingAlertLines, err := utils.CountLines(rsp) - if err != nil && err != io.EOF { - klog.Errorf("Unable to count truncated lines of alerts metric: %v", err) - return nil, []error{err} - } - totalAlertCount := r.GetTotalLinesRead() + remainingAlertLines - - // # ALERTS / - // The total number of alerts will typically be greater than the true number of alerts by 2 - // because the `# TYPE ALERTS untyped` header and the final empty line are counter in. - data = append(data, []byte(fmt.Sprintf("# ALERTS %d/%d\n", totalAlertCount, metricsAlertsLinesLimit))...) - data = append(data, alerts...) records := []record.Record{ {Name: "config/metrics", Item: marshal.RawByte(data), AlwaysStored: true}, } diff --git a/pkg/gatherers/clusterconfig/gather_most_recent_metrics_test.go b/pkg/gatherers/clusterconfig/gather_most_recent_metrics_test.go index 7ba677280..df8600c80 100644 --- a/pkg/gatherers/clusterconfig/gather_most_recent_metrics_test.go +++ b/pkg/gatherers/clusterconfig/gather_most_recent_metrics_test.go @@ -54,10 +54,9 @@ func Test_gatherMostRecentMetrics(t *testing.T) { metricsClient: &mockMostRecentMetricsClient{data: []byte(`test`)}, wantRecords: []record.Record{ { - Name: "config/metrics", - Item: marshal.RawByte(`test# ALERTS 1/1000 -test`), + Name: "config/metrics", AlwaysStored: true, + Item: marshal.RawByte(`test`), }, }, wantErrors: nil,