@@ -16,19 +16,32 @@ package manifests
16
16
17
17
import (
18
18
"bytes"
19
+ "context"
19
20
"encoding/json"
20
21
"fmt"
21
22
"io"
23
+ "math"
22
24
23
25
configv1 "github.com/openshift/api/config/v1"
24
26
monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
27
+ poperator "github.com/prometheus-operator/prometheus-operator/pkg/operator"
25
28
v1 "k8s.io/api/core/v1"
26
29
k8syaml "k8s.io/apimachinery/pkg/util/yaml"
27
30
auditv1 "k8s.io/apiserver/pkg/apis/audit/v1"
31
+ "k8s.io/klog/v2"
28
32
)
29
33
30
34
const (
31
35
DefaultRetentionValue = "15d"
36
+
37
+ // Limit the body size from scrape queries
38
+ // Assumptions: one node has in average 110 pods, each pod exposes 400 metrics, each metric is expressed by on average 250 bytes.
39
+ // 1.5x the size for a safe margin, it rounds to 16MB (16,500,000 Bytes).
40
+ minimalSizeLimit = 1.5 * 110 * 400 * 250
41
+
42
+ // A value of Prometheusk8s.enforceBodySizeLimit,
43
+ // meaning the limit will be automatically calculated based on cluster capacity.
44
+ automaticBodySizeLimit = "automatic"
32
45
)
33
46
34
47
type Config struct {
@@ -185,6 +198,12 @@ type PrometheusK8sConfig struct {
185
198
TelemetryMatches []string `json:"-"`
186
199
AlertmanagerConfigs []AdditionalAlertmanagerConfig `json:"additionalAlertmanagerConfigs"`
187
200
QueryLogFile string `json:"queryLogFile"`
201
+ /* EnforcedBodySizeLimit accept 3 kind of values:
202
+ * 1. empty value: no limit
203
+ * 2. a value in Prometheus size format, e.g. "64MB"
204
+ * 3. string "automatic", which means the limit will be automatically calculated based on cluster capacity.
205
+ */
206
+ EnforcedBodySizeLimit string `json:"enforcedBodySizeLimit,omitempty"`
188
207
}
189
208
190
209
type AdditionalAlertmanagerConfig struct {
@@ -327,7 +346,6 @@ func NewConfig(content io.Reader) (*Config, error) {
327
346
res := & c
328
347
res .applyDefaults ()
329
348
c .UserWorkloadConfiguration = NewDefaultUserWorkloadMonitoringConfig ()
330
-
331
349
return res , nil
332
350
}
333
351
@@ -474,6 +492,43 @@ func (c *Config) NoProxy() string {
474
492
return c .ClusterMonitoringConfiguration .HTTPConfig .NoProxy
475
493
}
476
494
495
+ // PodCapacityReader returns the maximum number of pods that can be scheduled in a cluster.
496
+ type PodCapacityReader interface {
497
+ PodCapacity (context.Context ) (int , error )
498
+ }
499
+
500
+ func (c * Config ) LoadEnforcedBodySizeLimit (pcr PodCapacityReader , ctx context.Context ) error {
501
+ if c .ClusterMonitoringConfiguration .PrometheusK8sConfig .EnforcedBodySizeLimit == "" {
502
+ return nil
503
+ }
504
+
505
+ if c .ClusterMonitoringConfiguration .PrometheusK8sConfig .EnforcedBodySizeLimit == automaticBodySizeLimit {
506
+ podCapacity , err := pcr .PodCapacity (ctx )
507
+ if err != nil {
508
+ return fmt .Errorf ("error fetching pod capacity: %v" , err )
509
+ }
510
+ c .ClusterMonitoringConfiguration .PrometheusK8sConfig .EnforcedBodySizeLimit = calculateBodySizeLimit (podCapacity )
511
+ return nil
512
+ }
513
+
514
+ return poperator .ValidateSizeField (c .ClusterMonitoringConfiguration .PrometheusK8sConfig .EnforcedBodySizeLimit )
515
+
516
+ }
517
+
518
+ func calculateBodySizeLimit (podCapacity int ) string {
519
+ const samplesPerPod = 400 // 400 samples per pod
520
+ const sizePerSample = 200 // 200 Bytes
521
+ const loadFactorPercentage = 100 // assume 100% of the maximum pods capacity per node is used
522
+
523
+ bodySize := loadFactorPercentage * podCapacity / 100 * samplesPerPod * sizePerSample
524
+ if bodySize < minimalSizeLimit {
525
+ klog .Infof ("Calculated scrape body size limit %v is too small, using default value %v instead" , bodySize , minimalSizeLimit )
526
+ bodySize = minimalSizeLimit
527
+ }
528
+
529
+ return fmt .Sprintf ("%dMB" , int (math .Ceil (float64 (bodySize )/ (1024 * 1024 ))))
530
+ }
531
+
477
532
func NewConfigFromString (content string ) (* Config , error ) {
478
533
if content == "" {
479
534
return NewDefaultConfig (), nil
0 commit comments