@@ -16,19 +16,33 @@ package manifests
16
16
17
17
import (
18
18
"bytes"
19
+ "context"
19
20
"encoding/json"
20
21
"fmt"
21
22
"io"
23
+ "math"
22
24
23
25
configv1 "github.com/openshift/api/config/v1"
24
26
monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
27
+ poperator "github.com/prometheus-operator/prometheus-operator/pkg/operator"
25
28
v1 "k8s.io/api/core/v1"
26
29
k8syaml "k8s.io/apimachinery/pkg/util/yaml"
27
30
auditv1 "k8s.io/apiserver/pkg/apis/audit/v1"
31
+ "k8s.io/klog/v2"
28
32
)
29
33
30
34
const (
31
35
DefaultRetentionValue = "15d"
36
+
37
+ // Limit the body size from scrape queries
38
+ // Assumptions: one node has in average 110 pods, each pod exposes 400 metrics, each metric is expressed by on average 250 bytes.
39
+ // 1.5x the size for a safe margin,
40
+ // minimal HA requires 3 nodes. it rounds to 47.2 MB (49,500,000 Bytes).
41
+ minimalSizeLimit = 3 * 1.5 * 110 * 400 * 250
42
+
43
+ // A value of Prometheusk8s.enforceBodySizeLimit,
44
+ // meaning the limit will be automatically calculated based on cluster capacity.
45
+ automaticBodySizeLimit = "automatic"
32
46
)
33
47
34
48
type Config struct {
@@ -186,6 +200,12 @@ type PrometheusK8sConfig struct {
186
200
TelemetryMatches []string `json:"-"`
187
201
AlertmanagerConfigs []AdditionalAlertmanagerConfig `json:"additionalAlertmanagerConfigs"`
188
202
QueryLogFile string `json:"queryLogFile"`
203
+ /* EnforcedBodySizeLimit accept 3 kind of values:
204
+ * 1. empty value: no limit
205
+ * 2. a value in Prometheus size format, e.g. "64MB"
206
+ * 3. string "automatic", which means the limit will be automatically calculated based on cluster capacity.
207
+ */
208
+ EnforcedBodySizeLimit string `json:"enforcedBodySizeLimit,omitempty"`
189
209
}
190
210
191
211
type AdditionalAlertmanagerConfig struct {
@@ -329,7 +349,6 @@ func NewConfig(content io.Reader) (*Config, error) {
329
349
res := & c
330
350
res .applyDefaults ()
331
351
c .UserWorkloadConfiguration = NewDefaultUserWorkloadMonitoringConfig ()
332
-
333
352
return res , nil
334
353
}
335
354
@@ -477,6 +496,42 @@ func (c *Config) NoProxy() string {
477
496
return c .ClusterMonitoringConfiguration .HTTPConfig .NoProxy
478
497
}
479
498
499
+ // PodCapacityReader returns the maximum number of pods that can be scheduled in a cluster.
500
+ type PodCapacityReader interface {
501
+ PodCapacity (context.Context ) (int , error )
502
+ }
503
+
504
+ func (c * Config ) LoadEnforcedBodySizeLimit (pcr PodCapacityReader , ctx context.Context ) error {
505
+ if c .ClusterMonitoringConfiguration .PrometheusK8sConfig .EnforcedBodySizeLimit == "" {
506
+ return nil
507
+ }
508
+
509
+ if c .ClusterMonitoringConfiguration .PrometheusK8sConfig .EnforcedBodySizeLimit == automaticBodySizeLimit {
510
+ podCapacity , err := pcr .PodCapacity (ctx )
511
+ if err != nil {
512
+ return fmt .Errorf ("error fetching pod capacity: %v" , err )
513
+ }
514
+ c .ClusterMonitoringConfiguration .PrometheusK8sConfig .EnforcedBodySizeLimit = calculateBodySizeLimit (podCapacity )
515
+ return nil
516
+ }
517
+
518
+ return poperator .ValidateSizeField (c .ClusterMonitoringConfiguration .PrometheusK8sConfig .EnforcedBodySizeLimit )
519
+
520
+ }
521
+
522
+ func calculateBodySizeLimit (podCapacity int ) string {
523
+ const samplesPerPod = 400 // 400 samples per pod
524
+ const sizePerSample = 200 // 200 Bytes
525
+
526
+ bodySize := podCapacity * samplesPerPod * sizePerSample
527
+ if bodySize < minimalSizeLimit {
528
+ klog .Infof ("Calculated scrape body size limit %v is too small, using default value %v instead" , bodySize , minimalSizeLimit )
529
+ bodySize = minimalSizeLimit
530
+ }
531
+
532
+ return fmt .Sprintf ("%dMB" , int (math .Ceil (float64 (bodySize )/ (1024 * 1024 ))))
533
+ }
534
+
480
535
func NewConfigFromString (content string ) (* Config , error ) {
481
536
if content == "" {
482
537
return NewDefaultConfig (), nil
0 commit comments