Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Prometheus Collector pattern #637

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ require (
github.com/openshift/client-go v0.0.0-20220525160904-9e1acff93e4a
github.com/openshift/installer v0.9.0-master.0.20191219195746-103098955ced
github.com/openshift/library-go v0.0.0-20220525173854-9b950a41acdc
github.com/prometheus/common v0.32.1
github.com/prometheus/client_golang v1.12.1
github.com/spf13/cobra v1.4.0
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.7.0
Expand Down
42 changes: 32 additions & 10 deletions manifests/06-deployment-ibm-cloud-managed.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@ spec:
labels:
app: insights-operator
spec:
securityContext:
runAsNonRoot: true
seccompProfile:
type: RuntimeDefault
containers:
- args:
- start
Expand All @@ -42,17 +38,18 @@ spec:
value: 0.0.1-snapshot
image: quay.io/openshift/origin-insights-operator:latest
name: insights-operator
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
ports:
- containerPort: 8443
name: https
- containerPort: 8080
name: metrics
resources:
requests:
cpu: 10m
memory: 30Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
terminationMessagePolicy: FallbackToLogsOnError
volumeMounts:
- mountPath: /var/lib/insights-operator
Expand All @@ -65,9 +62,34 @@ spec:
readOnly: true
- mountPath: /var/run/secrets/serving-cert
name: serving-cert
- args:
- --logtostderr
- --secure-listen-address=:8443
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:8080/
- --tls-cert-file=/etc/tls/private/tls.crt
- --tls-private-key-file=/etc/tls/private/tls.key
image: quay.io/openshift/origin-kube-rbac-proxy:latest
name: kube-rbac-proxy
ports:
- containerPort: 8443
name: https
resources:
requests:
cpu: 1m
memory: 20Mi
terminationMessagePolicy: FallbackToLogsOnError
volumeMounts:
- mountPath: /etc/tls/private
name: serving-cert
readOnly: false
nodeSelector:
beta.kubernetes.io/os: linux
priorityClassName: system-cluster-critical
securityContext:
runAsNonRoot: true
seccompProfile:
type: RuntimeDefault
serviceAccountName: operator
tolerations:
- effect: NoSchedule
Expand Down
25 changes: 23 additions & 2 deletions manifests/06-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ spec:
- mountPath: /var/run/secrets/serving-cert
name: serving-cert
ports:
- containerPort: 8443
name: https
- containerPort: 8080
name: metrics
resources:
requests:
cpu: 10m
Expand All @@ -99,3 +99,24 @@ spec:
- start
- -v=4
- --config=/etc/insights-operator/server.yaml
- name: kube-rbac-proxy
image: quay.io/openshift/origin-kube-rbac-proxy:latest
args:
- --logtostderr
- --secure-listen-address=:8443
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
- --upstream=http://127.0.0.1:8080/
- --tls-cert-file=/etc/tls/private/tls.crt
- --tls-private-key-file=/etc/tls/private/tls.key
ports:
- containerPort: 8443
name: https
resources:
requests:
cpu: 1m
memory: 20Mi
terminationMessagePolicy: FallbackToLogsOnError
volumeMounts:
- mountPath: /etc/tls/private
name: serving-cert
readOnly: false
4 changes: 4 additions & 0 deletions manifests/image-references
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,7 @@ spec:
from:
kind: "DockerImage"
name: "quay.io/openshift/origin-insights-operator:latest"
- name: kube-rbac-proxy
from:
kind: "DockerImage"
name: "quay.io/openshift/origin-kube-rbac-proxy:latest"
1 change: 0 additions & 1 deletion pkg/cmd/start/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,6 @@ func runOperator(operator *controller.Operator, cfg *controllercmd.ControllerCom
builder := controllercmd.NewController("openshift-insights-operator", operator.Run).
WithKubeConfigFile(cmd.Flags().Lookup("kubeconfig").Value.String(), nil).
WithLeaderElection(operatorConfig.LeaderElection, "", "openshift-insights-operator-lock").
WithServer(operatorConfig.ServingInfo, operatorConfig.Authentication, operatorConfig.Authorization).
WithRestartOnChange(exitOnChangeReactorCh, startingFileContent, observedFiles...)
if err := builder.Run(ctx2, unstructured); err != nil {
klog.Error(err)
Expand Down
4 changes: 4 additions & 0 deletions pkg/controller/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/openshift/insights-operator/pkg/controller/periodic"
"github.com/openshift/insights-operator/pkg/controller/status"
"github.com/openshift/insights-operator/pkg/gather"
"github.com/openshift/insights-operator/pkg/insights"
"github.com/openshift/insights-operator/pkg/insights/insightsclient"
"github.com/openshift/insights-operator/pkg/insights/insightsreport"
"github.com/openshift/insights-operator/pkg/insights/insightsuploader"
Expand Down Expand Up @@ -53,6 +54,9 @@ func (s *Operator) Run(ctx context.Context, controller *controllercmd.Controller
}
s.Controller = cont

// Start the Prometheus metrics server.
go insights.RunMetricsServer()

// these are operator clients
kubeClient, err := kubernetes.NewForConfig(controller.ProtoKubeConfig)
if err != nil {
Expand Down
23 changes: 5 additions & 18 deletions pkg/insights/insightsclient/insightsclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ import (
"k8s.io/client-go/pkg/version"
"k8s.io/client-go/rest"
"k8s.io/client-go/transport"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"

"k8s.io/klog/v2"

configv1 "github.com/openshift/api/config/v1"
configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"
"github.com/openshift/insights-operator/pkg/insights"
"github.com/prometheus/client_golang/prometheus"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
apimachineryversion "k8s.io/apimachinery/pkg/version"
Expand Down Expand Up @@ -265,29 +265,16 @@ func (c *Client) createAndWriteMIMEHeader(source *Source, mw *multipart.Writer,
}

var (
counterRequestSend = metrics.NewCounterVec(&metrics.CounterOpts{
counterRequestSend = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "insightsclient_request_send_total",
Help: "Tracks the number of metrics sends",
}, []string{"client", "status_code"})
counterRequestRecvReport = metrics.NewCounterVec(&metrics.CounterOpts{
counterRequestRecvReport = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "insightsclient_request_recvreport_total",
Help: "Tracks the number of reports requested",
}, []string{"client", "status_code"})
)

func init() {
err := legacyregistry.Register(
counterRequestSend,
)
if err != nil {
fmt.Println(err)
}

err = legacyregistry.Register(
counterRequestRecvReport,
)
if err != nil {
fmt.Println(err)
}

insights.MustRegisterMetricCollectors(counterRequestSend, counterRequestRecvReport)
}
44 changes: 20 additions & 24 deletions pkg/insights/insightsreport/insightsreport.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ import (
"time"

"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/klog/v2"

"github.com/openshift/insights-operator/pkg/authorizer"
"github.com/openshift/insights-operator/pkg/config/configobserver"
"github.com/openshift/insights-operator/pkg/controllerstatus"
"github.com/openshift/insights-operator/pkg/insights"
"github.com/openshift/insights-operator/pkg/insights/insightsclient"
"github.com/prometheus/client_golang/prometheus"
)

// Controller gathers the report from Smart Proxy
Expand All @@ -38,10 +38,13 @@ type InsightsReporter interface {
ArchiveUploaded() <-chan struct{}
}

var (
const (
insightsLastGatherTimeName = "insightsclient_last_gather_time"
)

var (
// insightsStatus contains a metric with the latest report information
insightsStatus = metrics.NewGaugeVec(&metrics.GaugeOpts{
insightsStatus = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "health",
Subsystem: "statuses",
Name: "insights",
Expand All @@ -51,28 +54,11 @@ var (
retryThreshold = 2

// insightsLastGatherTime contains time of the last Insights data gathering
insightsLastGatherTime = metrics.NewGauge(&metrics.GaugeOpts{
Name: "insightsclient_last_gather_time",
insightsLastGatherTime = prometheus.NewGauge(prometheus.GaugeOpts{
Name: insightsLastGatherTimeName,
})
)

func init() {
err := legacyregistry.Register(insightsStatus)
if err != nil {
fmt.Println(err)
}
err = legacyregistry.Register(insightsLastGatherTime)
if err != nil {
fmt.Println(err)
}

insightsStatus.WithLabelValues("low").Set(float64(-1))
insightsStatus.WithLabelValues("moderate").Set(float64(-1))
insightsStatus.WithLabelValues("important").Set(float64(-1))
insightsStatus.WithLabelValues("critical").Set(float64(-1))
insightsStatus.WithLabelValues("total").Set(float64(-1))
}

// New initializes and returns a Gatherer
func New(client *insightsclient.Client, configurator configobserver.Configurator, reporter InsightsReporter) *Controller {
return &Controller{
Expand Down Expand Up @@ -286,8 +272,18 @@ func updateInsightsMetrics(report SmartProxyReport) {

t, err := time.Parse(time.RFC3339, string(report.Meta.GatheredAt))
if err != nil {
klog.Errorf("Metric %s not updated. Failed to parse time: %v", insightsLastGatherTime.Name, err)
klog.Errorf("Metric %s not updated. Failed to parse time: %v", insightsLastGatherTimeName, err)
return
}
insightsLastGatherTime.Set(float64(t.Unix()))
}

func init() {
insights.MustRegisterMetricCollectors(insightsStatus, insightsLastGatherTime)

insightsStatus.WithLabelValues("low").Set(float64(-1))
insightsStatus.WithLabelValues("moderate").Set(float64(-1))
insightsStatus.WithLabelValues("important").Set(float64(-1))
insightsStatus.WithLabelValues("critical").Set(float64(-1))
insightsStatus.WithLabelValues("total").Set(float64(-1))
}
45 changes: 45 additions & 0 deletions pkg/insights/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package insights

import (
"net/http"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promhttp"
"k8s.io/klog/v2"
)

var (
insightsMetricsRegistry *prometheus.Registry
)

func init() {
insightsMetricsRegistry = prometheus.NewRegistry()
MustRegisterMetricCollectors(
collectors.NewGoCollector(),
collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}),
)
}

// RunMetricsServer starts an HTTP server for the Insights metrics registry.
// The server will run synchronously in an infinite loop. In case of an error,
// it will be logged, and the server will be restarted after a short sleep
// (to avoid spamming the log with the same error).
func RunMetricsServer() {
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.HandlerFor(insightsMetricsRegistry, promhttp.HandlerOpts{}))
for {
klog.Info("Starting the Prometheus metrics server")
klog.Errorf("Unable to serve metrics: %v", http.ListenAndServe(":8080", mux))
time.Sleep(time.Minute)
}
}

// RegisterMetricCollector registers a new metric collector or a new metric in
// the Insights metrics registry. This function should be called from init()
// functions only, because it uses the MustRegister method, and therefore panics
// in case of an error.
func MustRegisterMetricCollectors(collectors ...prometheus.Collector) {
insightsMetricsRegistry.MustRegister(collectors...)
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading