Skip to content

pkg/helm/run.go,pkg/internal/scaffold: helm operator metrics #1482

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 5, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions hack/tests/e2e-helm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,20 @@ test_operator() {
exit 1
fi

# verify that metrics service was created
if ! timeout 20s bash -c -- "until kubectl get service/nginx-operator > /dev/null 2>&1; do sleep 1; done";
then
kubectl logs deployment/nginx-operator
exit 1
fi

# verify that the metrics endpoint exists
if ! timeout 1m bash -c -- "until kubectl run -it --rm --restart=Never test-metrics --image=registry.access.redhat.com/ubi7/ubi-minimal:latest -- curl -sfo /dev/null http://nginx-operator:8383/metrics; do sleep 1; done";
then
kubectl logs deployment/nginx-operator
exit 1
fi

# create CR
kubectl create -f deploy/crds/helm_v1alpha1_nginx_cr.yaml
trap_add 'kubectl delete --ignore-not-found -f ${OPERATORDIR}/deploy/crds/helm_v1alpha1_nginx_cr.yaml' EXIT
Expand Down Expand Up @@ -139,6 +153,7 @@ go mod vendor
echo "module github.com/operator-framework/operator-sdk" > $ROOTDIR/go.mod
trap_add 'rm $ROOTDIR/go.mod' EXIT
go mod edit -replace=github.com/operator-framework/operator-sdk=$ROOTDIR
go mod vendor

operator-sdk build "$DEST_IMAGE"

Expand Down
1 change: 1 addition & 0 deletions internal/pkg/scaffold/helm/go_mod.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ replace (
)

replace (
github.com/coreos/prometheus-operator => github.com/coreos/prometheus-operator v0.29.0
k8s.io/code-generator => k8s.io/code-generator v0.0.0-20181117043124-c2090bec4d9b
k8s.io/kube-openapi => k8s.io/kube-openapi v0.0.0-20180711000925-0cf8f7e6ed1d
)
Expand Down
2 changes: 0 additions & 2 deletions internal/pkg/scaffold/helm/role.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ func CreateRoleScaffold(dc roleDiscoveryInterface, chart *chart.Chart) (*scaffol
roleScaffold := &scaffold.Role{
IsClusterScoped: false,
SkipDefaultRules: true,
// TODO: enable metrics in helm operator
SkipMetricsRules: true,
CustomRules: []rbacv1.PolicyRule{
// We need this rule so tiller can read namespaces to ensure they exist
{
Expand Down
7 changes: 1 addition & 6 deletions internal/pkg/scaffold/helm/role_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import (
"testing"

"github.com/operator-framework/operator-sdk/internal/pkg/scaffold/helm"

"github.com/stretchr/testify/assert"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/version"
Expand All @@ -43,23 +43,20 @@ func TestCreateRoleScaffold(t *testing.T) {
name: "fallback to default",
chart: failChart(),
expectSkipDefaultRules: false,
expectSkipMetricsRules: true,
expectIsClusterScoped: false,
expectLenCustomRules: 2,
},
{
name: "namespaced manifest",
chart: namespacedChart(),
expectSkipDefaultRules: true,
expectSkipMetricsRules: true,
expectIsClusterScoped: false,
expectLenCustomRules: 3,
},
{
name: "cluster scoped manifest",
chart: clusterScopedChart(),
expectSkipDefaultRules: true,
expectSkipMetricsRules: true,
expectIsClusterScoped: true,
expectLenCustomRules: 4,
},
Expand All @@ -77,7 +74,6 @@ func TestCreateRoleScaffold(t *testing.T) {
assert.NoError(t, err)
}
assert.Equal(t, tc.expectSkipDefaultRules, roleScaffold.SkipDefaultRules)
assert.Equal(t, tc.expectSkipMetricsRules, roleScaffold.SkipMetricsRules)
assert.Equal(t, tc.expectLenCustomRules, len(roleScaffold.CustomRules))
assert.Equal(t, tc.expectIsClusterScoped, roleScaffold.IsClusterScoped)
})
Expand Down Expand Up @@ -122,7 +118,6 @@ type roleScaffoldTestCase struct {
name string
chart *chart.Chart
expectSkipDefaultRules bool
expectSkipMetricsRules bool
expectIsClusterScoped bool
expectLenCustomRules int
expectErr bool
Expand Down
15 changes: 12 additions & 3 deletions internal/pkg/scaffold/role.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ type Role struct {

IsClusterScoped bool
SkipDefaultRules bool
SkipMetricsRules bool
CustomRules []rbacv1.PolicyRule
}

Expand Down Expand Up @@ -213,7 +212,6 @@ rules:
{{- end }}
{{- end }}
{{- end }}
{{- if not .SkipMetricsRules }}
- apiGroups:
- monitoring.coreos.com
resources:
Expand All @@ -229,5 +227,16 @@ rules:
- {{ .ProjectName }}
verbs:
- "update"
{{- end }}
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- apps
resources:
- replicasets
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why replicasets here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is necessary for code that moves up the owner reference chain from the pod to figure out which resource to use for the metrics service owner reference. Since our default operator workload kind is Deployment, the code needs to be able to GET both pods and replicasets. Since we end up using the replicaset's owner reference, we don't actually need GET permission on deployments.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already have pods and replicasets listed with the verb * above in Lines 164-184

- apiGroups:
  - ""
  resources:
  - pods
...
  verbs:
  - "*"
- apiGroups:
  - apps
  resources:
  - deployments
  - daemonsets
  - replicasets
  - statefulsets
  verbs:
  - "*"

Those are the general default rules and these ones are specific to allowing metrics to be exposed. Do you think it's worth adding a comment in the scaffold so it's obvious to the user why we've repeated the rules for pods and replicasets?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be nice. Lili asked this same question when we introduced the initial helm role generation. I looked into it and it turns out it isn't straigtforward since we unmarshal and remarshal the YAML (thus losing the comments) to update the role for CRD APIs.

verbs:
- get
`
52 changes: 51 additions & 1 deletion internal/pkg/scaffold/role_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ func TestRoleCustomRules(t *testing.T) {
s, buf := setupScaffoldAndWriter()
err := s.Execute(appConfig, &Role{
SkipDefaultRules: true,
SkipMetricsRules: true,
CustomRules: []rbacv1.PolicyRule{
{
APIGroups: []string{"policy"},
Expand Down Expand Up @@ -116,6 +115,18 @@ rules:
- app-operator
verbs:
- "update"
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- apps
resources:
- replicasets
verbs:
- get
`

const clusterroleExp = `kind: ClusterRole
Expand Down Expand Up @@ -159,6 +170,18 @@ rules:
- app-operator
verbs:
- "update"
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- apps
resources:
- replicasets
verbs:
- get
`

const roleCustomRulesExp = `kind: Role
Expand All @@ -181,4 +204,31 @@ rules:
resources:
- "roles"
- "rolebindings"
- apiGroups:
- monitoring.coreos.com
resources:
- servicemonitors
verbs:
- "get"
- "create"
- apiGroups:
- apps
resources:
- deployments/finalizers
resourceNames:
- app-operator
verbs:
- "update"
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- apps
resources:
- replicasets
verbs:
- get
`
20 changes: 18 additions & 2 deletions pkg/helm/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/operator-framework/operator-sdk/pkg/helm/watches"
"github.com/operator-framework/operator-sdk/pkg/k8sutil"
"github.com/operator-framework/operator-sdk/pkg/leader"
"github.com/operator-framework/operator-sdk/pkg/metrics"
sdkVersion "github.com/operator-framework/operator-sdk/version"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -35,6 +36,11 @@ import (
"sigs.k8s.io/controller-runtime/pkg/runtime/signals"
)

var (
metricsHost = "0.0.0.0"
metricsPort int32 = 8383
)

var log = logf.Log.WithName("cmd")

func printVersion() {
Expand Down Expand Up @@ -67,7 +73,8 @@ func Run(flags *hoflags.HelmOperatorFlags) error {
return err
}
mgr, err := manager.New(cfg, manager.Options{
Namespace: namespace,
Namespace: namespace,
MetricsBindAddress: fmt.Sprintf("%s:%d", metricsHost, metricsPort),
})
if err != nil {
log.Error(err, "Failed to create a new manager.")
Expand Down Expand Up @@ -100,13 +107,22 @@ func Run(flags *hoflags.HelmOperatorFlags) error {
log.Error(err, "Failed to get operator name")
return err
}

ctx := context.TODO()

// Become the leader before proceeding
err = leader.Become(context.TODO(), operatorName+"-lock")
err = leader.Become(ctx, operatorName+"-lock")
if err != nil {
log.Error(err, "Failed to become leader.")
return err
}

// Create Service object to expose the metrics port.
_, err = metrics.ExposeMetricsPort(ctx, metricsPort)
if err != nil {
log.Info(err.Error())
}

// Start the Cmd
if err = mgr.Start(signals.SetupSignalHandler()); err != nil {
log.Error(err, "Manager exited non-zero.")
Expand Down