add prometheusK8s.enforcedBodySizeLimit to CMO ConfigMap, limiting the

raptorsun · raptorsun · commit 86b83cb26708 · 2022-04-22T18:46:09.000+02:00
bodysize when scraping metric.
Empty value or 0 means bodysize limit. "automatic" for automatically
deduced bodysize limit.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,7 @@
 - [#1638](https://github.com/openshift/cluster-monitoring-operator/pull/1638) Expose sigv4 setting to Prometheus remoteWrite
 - [#1579](https://github.com/openshift/cluster-monitoring-operator/pull/1579) Expose retention size settings for Platform Prometheus
 - [#1630](https://github.com/openshift/cluster-monitoring-operator/pull/1630) Expose retention size settings for UWM Prometheus
+- [#1467](https://github.com/openshift/cluster-monitoring-operator/pull/1467) Add bodysize limit for metric scraping
 
 ## 4.10
 
diff --git a/pkg/client/client.go b/pkg/client/client.go
@@ -1524,6 +1524,24 @@ func (c *Client) DeleteRole(ctx context.Context, role *rbacv1.Role) error {
 	return err
 }
 
+func (c *Client) PodCapacity(ctx context.Context) (int, error) {
+	nodes, err := c.kclient.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
+	if err != nil {
+		return 0, err
+	}
+	var podCapacityTotal int64
+	for _, node := range nodes.Items {
+		podsCount, succeeded := node.Status.Capacity.Pods().AsInt64()
+		if !succeeded {
+			klog.Warningf("Cannot get pod capacity from node: %s. Error: %v", node.Name, err)
+			continue
+		}
+		podCapacityTotal += podsCount
+	}
+
+	return int(podCapacityTotal), nil
+}
+
 // mergeMetadata merges labels and annotations from `existing` map into `required` one where `required` has precedence
 // over `existing` keys and values. Additionally function performs filtering of labels and annotations from `exiting` map
 // where keys starting from string defined in `metadataPrefix` are deleted. This prevents issues with preserving stale
diff --git a/pkg/client/client_test.go b/pkg/client/client_test.go
@@ -25,6 +25,7 @@ import (
 	appsv1 "k8s.io/api/apps/v1"
 	v1 "k8s.io/api/core/v1"
 	rbacv1 "k8s.io/api/rbac/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 
 	monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
@@ -1836,3 +1837,50 @@ func TestCreateOrUpdateValidatingWebhookConfiguration(t *testing.T) {
 		})
 	}
 }
+
+func TestPodCapacity(t *testing.T) {
+	ctx := context.Background()
+	node1 := v1.Node{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "node1",
+		},
+		Status: v1.NodeStatus{
+			Capacity: v1.ResourceList{
+				v1.ResourcePods: resource.MustParse("100"),
+			},
+		},
+	}
+	node2 := v1.Node{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "node2",
+		},
+		Status: v1.NodeStatus{
+			Capacity: v1.ResourceList{
+				v1.ResourcePods: resource.MustParse("50"),
+			},
+		},
+	}
+	nodeList := v1.NodeList{
+		Items: []v1.Node{
+			node1,
+			node2,
+		},
+	}
+	t.Run("sum 2 nodes pod capacity", func(st *testing.T) {
+
+		c := Client{
+			kclient: fake.NewSimpleClientset(nodeList.DeepCopy()),
+		}
+
+		podCapacity, err := c.PodCapacity(ctx)
+
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if podCapacity != 150 {
+			t.Fatalf("expected pods capacity 150, got %d", podCapacity)
+		}
+	})
+
+}
diff --git a/pkg/manifests/config.go b/pkg/manifests/config.go
@@ -16,19 +16,32 @@ package manifests
 
 import (
 	"bytes"
+	"context"
 	"encoding/json"
 	"fmt"
 	"io"
+	"math"
 
 	configv1 "github.com/openshift/api/config/v1"
 	monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
+	poperator "github.com/prometheus-operator/prometheus-operator/pkg/operator"
 	v1 "k8s.io/api/core/v1"
 	k8syaml "k8s.io/apimachinery/pkg/util/yaml"
 	auditv1 "k8s.io/apiserver/pkg/apis/audit/v1"
+	"k8s.io/klog/v2"
 )
 
 const (
 	DefaultRetentionValue = "15d"
+
+	// Limit the body size from scrape queries
+	// Assumptions: one node has in average 110 pods, each pod exposes 400 metrics, each metric is expressed by on average 250 bytes.
+	// 1.5x the size for a safe margin, it rounds to 16MB (16,500,000 Bytes).
+	minimalSizeLimit = 1.5 * 110 * 400 * 250
+
+	// A value of Prometheusk8s.enforceBodySizeLimit,
+	// meaning the limit will be automatically calculated based on cluster capacity.
+	automaticBodySizeLimit = "automatic"
 )
 
 type Config struct {
@@ -185,6 +198,12 @@ type PrometheusK8sConfig struct {
 	TelemetryMatches    []string                             `json:"-"`
 	AlertmanagerConfigs []AdditionalAlertmanagerConfig       `json:"additionalAlertmanagerConfigs"`
 	QueryLogFile        string                               `json:"queryLogFile"`
+	/* EnforcedBodySizeLimit accept 3 kind of values:
+	 * 1. empty value: no limit
+	 * 2. a value in Prometheus size format, e.g. "64MB"
+	 * 3. string "automatic", which means the limit will be automatically calculated based on cluster capacity.
+	 */
+	EnforcedBodySizeLimit string `json:"enforcedBodySizeLimit,omitempty"`
 }
 
 type AdditionalAlertmanagerConfig struct {
@@ -327,7 +346,6 @@ func NewConfig(content io.Reader) (*Config, error) {
 	res := &c
 	res.applyDefaults()
 	c.UserWorkloadConfiguration = NewDefaultUserWorkloadMonitoringConfig()
-
 	return res, nil
 }
 
@@ -474,6 +492,43 @@ func (c *Config) NoProxy() string {
 	return c.ClusterMonitoringConfiguration.HTTPConfig.NoProxy
 }
 
+// PodCapacityReader returns the maximum number of pods that can be scheduled in a cluster.
+type PodCapacityReader interface {
+	PodCapacity(context.Context) (int, error)
+}
+
+func (c *Config) LoadEnforcedBodySizeLimit(pcr PodCapacityReader, ctx context.Context) error {
+	if c.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedBodySizeLimit == "" {
+		return nil
+	}
+
+	if c.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedBodySizeLimit == automaticBodySizeLimit {
+		podCapacity, err := pcr.PodCapacity(ctx)
+		if err != nil {
+			return fmt.Errorf("error fetching pod capacity: %v", err)
+		}
+		c.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedBodySizeLimit = calculateBodySizeLimit(podCapacity)
+		return nil
+	}
+
+	return poperator.ValidateSizeField(c.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedBodySizeLimit)
+
+}
+
+func calculateBodySizeLimit(podCapacity int) string {
+	const samplesPerPod = 400        // 400 samples per pod
+	const sizePerSample = 200        // 200 Bytes
+	const loadFactorPercentage = 100 // assume 100% of the maximum pods capacity per node is used
+
+	bodySize := loadFactorPercentage * podCapacity / 100 * samplesPerPod * sizePerSample
+	if bodySize < minimalSizeLimit {
+		klog.Infof("Calculated scrape body size limit %v is too small, using default value %v instead", bodySize, minimalSizeLimit)
+		bodySize = minimalSizeLimit
+	}
+
+	return fmt.Sprintf("%dMB", int(math.Ceil(float64(bodySize)/(1024*1024))))
+}
+
 func NewConfigFromString(content string) (*Config, error) {
 	if content == "" {
 		return NewDefaultConfig(), nil
diff --git a/pkg/manifests/config_test.go b/pkg/manifests/config_test.go
@@ -16,6 +16,8 @@ package manifests
 
 import (
 	"bytes"
+	"context"
+	"errors"
 	"io/ioutil"
 	"os"
 	"testing"
@@ -192,7 +194,7 @@ func TestHttpProxyConfig(t *testing.T) {
 	conf := `http:
   httpProxy: http://test.com
   httpsProxy: https://test.com
-  noProxy: https://example.com	
+  noProxy: https://example.com
 `
 
 	c, err := NewConfig(bytes.NewBufferString(conf))
@@ -234,3 +236,100 @@ func TestHttpProxyConfig(t *testing.T) {
 		}
 	}
 }
+
+type fakePodCapacity struct {
+	capacity int
+	err      error
+}
+
+func (fpc *fakePodCapacity) PodCapacity(context.Context) (int, error) {
+	return fpc.capacity, fpc.err
+}
+
+func TestLoadEnforcedBodySizeLimit(t *testing.T) {
+
+	mc_10 := fakePodCapacity{capacity: 10, err: nil}
+	mc_1000 := fakePodCapacity{capacity: 1000, err: nil}
+	mc_err := fakePodCapacity{capacity: 1000, err: errors.New("error")}
+	for _, tt := range []struct {
+		name                string
+		config              string
+		expectBodySizeLimit string
+		expectError         bool
+		pcr                 PodCapacityReader
+	}{
+		{
+			name:                "empty config",
+			config:              "",
+			expectBodySizeLimit: "",
+			expectError:         false,
+			pcr:                 &mc_10,
+		},
+		{
+			name:                "disable body size limit",
+			config:              `{"prometheusK8s": {"enforcedBodySizeLimit": "0"}}`,
+			expectBodySizeLimit: "0",
+			expectError:         false,
+			pcr:                 &mc_10,
+		},
+		{
+			name:                "normal size format",
+			config:              `{"prometheusK8s": {"enforcedBodySizeLimit": "10KB"}}`,
+			expectBodySizeLimit: "10KB",
+			expectError:         false,
+			pcr:                 &mc_10,
+		},
+		{
+			name:                "invalid size format",
+			config:              `{"prometheusK8s": {"enforcedBodySizeLimit": "10EUR"}}`,
+			expectBodySizeLimit: "",
+			expectError:         true,
+			pcr:                 &mc_10,
+		},
+		{
+			name:                "automatic deduced limit: error when getting pods capacity",
+			config:              `{"prometheusK8s": {"enforcedBodySizeLimit": "automatic"}}`,
+			expectBodySizeLimit: "",
+			expectError:         true,
+			pcr:                 &mc_err,
+		},
+		{
+			name:                "automatically deduced limit: minimal 16MB",
+			config:              `{"prometheusK8s": {"enforcedBodySizeLimit": "automatic"}}`,
+			expectBodySizeLimit: "16MB",
+			expectError:         false,
+			pcr:                 &mc_10,
+		},
+		{
+			name:                "automatically deduced limit: larger than minimal 16MB",
+			config:              `{"prometheusK8s": {"enforcedBodySizeLimit": "automatic"}}`,
+			expectBodySizeLimit: "77MB",
+			expectError:         false,
+			pcr:                 &mc_1000,
+		},
+	} {
+		t.Run(tt.name, func(t *testing.T) {
+			c, err := NewConfigFromString(tt.config)
+			if err != nil {
+				t.Fatalf("config parsing error")
+			}
+
+			err = c.LoadEnforcedBodySizeLimit(tt.pcr, context.Background())
+			if tt.expectError {
+				if err == nil {
+					t.Fatalf("expected error, got nil")
+				}
+				return
+			}
+			if err != nil {
+				t.Fatalf("expected no error, got error %v", err)
+			}
+
+			if c.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedBodySizeLimit != tt.expectBodySizeLimit {
+				t.Fatalf("incorrect EnforcedBodySizeLimit is set: got %s, expected %s",
+					c.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedBodySizeLimit,
+					tt.expectBodySizeLimit)
+			}
+		})
+	}
+}
diff --git a/pkg/manifests/manifests.go b/pkg/manifests/manifests.go
@@ -1657,6 +1657,10 @@ func (f *Factory) PrometheusK8s(grpcTLS *v1.Secret, trustedCABundleCM *v1.Config
 		p.Spec.Secrets = append(p.Spec.Secrets, getAdditionalAlertmanagerSecrets(f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.AlertmanagerConfigs)...)
 	}
 
+	if f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedBodySizeLimit != "" {
+		p.Spec.EnforcedBodySizeLimit = f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedBodySizeLimit
+	}
+
 	return p, nil
 }
 
diff --git a/pkg/manifests/manifests_test.go b/pkg/manifests/manifests_test.go
@@ -15,6 +15,7 @@
 package manifests
 
 import (
+	"context"
 	"errors"
 	"fmt"
 	"net/url"
@@ -1468,8 +1469,13 @@ ingress:
 		t.Fatal("Prometheus image is not configured correctly")
 	}
 
+	if p.Spec.EnforcedBodySizeLimit != "" {
+		t.Fatal("EnforcedBodySizeLimit is not set to empty by default")
+	}
+
 	kubeRbacProxyTLSCipherSuitesArg := ""
 	kubeRbacProxyMinTLSVersionArg := ""
+
 	for _, container := range p.Spec.Containers {
 		switch container.Name {
 		case "prometheus-proxy":
@@ -1744,6 +1750,43 @@ func TestPrometheusRetentionConfigs(t *testing.T) {
 	}
 }
 
+func TestPrometheusK8sConfigurationBodySizeLimit(t *testing.T) {
+	pcr := &fakePodCapacity{
+		capacity: 1000,
+		err:      nil,
+	}
+	ctx := context.Background()
+
+	c, err := NewConfigFromString(`
+prometheusK8s:
+    enforcedBodySizeLimit: "10MB"
+  `)
+
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	err = c.LoadEnforcedBodySizeLimit(pcr, ctx)
+
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	f := NewFactory("openshift-monitoring", "openshift-user-workload-monitoring", c, defaultInfrastructureReader(), &fakeProxyReader{}, NewAssets(assetsPath), &APIServerConfig{}, nil)
+	p, err := f.PrometheusK8s(
+		&v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "foo"}},
+		&v1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "foo"}},
+	)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// the body size limit value is not set at configuration parsing time.
+	if p.Spec.EnforcedBodySizeLimit != "10MB" {
+		t.Fatalf("EnforcedBodySizeLimit is not configured correctly, expected 10MB but got %v", p.Spec.EnforcedBodySizeLimit)
+	}
+
+}
 func TestPrometheusK8sAdditionalAlertManagerConfigsSecret(t *testing.T) {
 	testCases := []struct {
 		name           string
diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go
@@ -829,6 +829,12 @@ func (o *Operator) Config(ctx context.Context, key string) (*manifests.Config, e
 	}
 	o.userWorkloadEnabled = *c.ClusterMonitoringConfiguration.UserWorkloadEnabled
 
+	err = c.LoadEnforcedBodySizeLimit(o.client, ctx)
+	if err != nil {
+		c.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedBodySizeLimit = ""
+		klog.Warningf("Error loading enforced body size limit, no body size limit will be enforced: %v", err)
+	}
+
 	// Only fetch the token and cluster ID if they have not been specified in the config.
 	if c.ClusterMonitoringConfiguration.TelemeterClientConfig.ClusterID == "" || c.ClusterMonitoringConfiguration.TelemeterClientConfig.Token == "" {
 		err := c.LoadClusterID(func() (*configv1.ClusterVersion, error) {
diff --git a/test/e2e/prometheus_test.go b/test/e2e/prometheus_test.go

Original file line number	Diff line number	Diff line change
`@@ -1657,6 +1657,10 @@ func (f Factory) PrometheusK8s(grpcTLS v1.Secret, trustedCABundleCM *v1.Config`
`1657`	`1657`	`p.Spec.Secrets = append(p.Spec.Secrets, getAdditionalAlertmanagerSecrets(f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.AlertmanagerConfigs)...)`
`1658`	`1658`	`}`
`1659`	`1659`
	`1660`	`+ if f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedBodySizeLimit != "" {`
	`1661`	`+ p.Spec.EnforcedBodySizeLimit = f.config.ClusterMonitoringConfiguration.PrometheusK8sConfig.EnforcedBodySizeLimit`
	`1662`	`+ }`
	`1663`	`+`
`1660`	`1664`	`return p, nil`
`1661`	`1665`	`}`
`1662`	`1666`