Skip to content

Commit 9116c65

Browse files
committed
OCPBUGS-17035: fix KRP permissions for Thanos Querier
fix KRP configuration for thanos-querier in order to enforce restricted access only to entities allowed to [http-verb] for pods.metrics.k8s.io Signed-off-by: Pranshu Srivastava <[email protected]>
1 parent 5a4cfa9 commit 9116c65

File tree

5 files changed

+244
-21
lines changed

5 files changed

+244
-21
lines changed

assets/thanos-querier/deployment.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ spec:
140140
- mountPath: /etc/proxy/secrets
141141
name: secret-thanos-querier-oauth-cookie
142142
- args:
143+
- --v=12
143144
- --secure-listen-address=0.0.0.0:9092
144145
- --upstream=http://127.0.0.1:9095
145146
- --config-file=/etc/kube-rbac-proxy/config.yaml

assets/thanos-querier/kube-rbac-proxy-secret.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ stringData:
1515
config.yaml: |-
1616
"authorization":
1717
"resourceAttributes":
18-
"apiVersion": "metrics.k8s.io/v1beta1"
18+
"apiGroup": "metrics.k8s.io"
19+
"apiVersion": "v1beta1"
1920
"namespace": "{{ .Value }}"
2021
"resource": "pods"
2122
"rewrites":

jsonnet/components/thanos-querier.libsonnet

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,8 @@ function(params)
144144
},
145145
},
146146
resourceAttributes: {
147-
apiVersion: 'metrics.k8s.io/v1beta1',
147+
apiVersion: 'v1beta1',
148+
apiGroup: 'metrics.k8s.io',
148149
resource: 'pods',
149150
namespace: '{{ .Value }}',
150151
},
@@ -465,6 +466,7 @@ function(params)
465466
},
466467
],
467468
args: [
469+
'--v=12',
468470
'--secure-listen-address=0.0.0.0:9092',
469471
'--upstream=http://127.0.0.1:9095',
470472
'--config-file=/etc/kube-rbac-proxy/config.yaml',

test/e2e/framework/framework.go

Lines changed: 88 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ type Framework struct {
8282

8383
// New returns a new cluster monitoring operator end-to-end test framework and
8484
// triggers all the setup logic.
85-
func New(kubeConfigPath string) (*Framework, cleanUpFunc, error) {
85+
func New(kubeConfigPath string) (*Framework, CleanUpFunc, error) {
8686
ctx := context.Background()
8787
config, err := clientcmd.BuildConfigFromFlags("", kubeConfigPath)
8888
if err != nil {
@@ -218,11 +218,11 @@ func New(kubeConfigPath string) (*Framework, cleanUpFunc, error) {
218218
return f, cleanUp, nil
219219
}
220220

221-
type cleanUpFunc func() error
221+
type CleanUpFunc func() error
222222

223223
// setup creates everything necessary to use the test framework.
224-
func (f *Framework) setup() (cleanUpFunc, error) {
225-
cleanUpFuncs := []cleanUpFunc{}
224+
func (f *Framework) setup() (CleanUpFunc, error) {
225+
cleanUpFuncs := []CleanUpFunc{}
226226

227227
cf, err := f.CreateServiceAccount(f.Ns, E2eServiceAccount)
228228
if err != nil {
@@ -273,7 +273,7 @@ func (f *Framework) setup() (cleanUpFunc, error) {
273273
}, nil
274274
}
275275

276-
func (f *Framework) CreateServiceAccount(namespace, serviceAccount string) (cleanUpFunc, error) {
276+
func (f *Framework) CreateServiceAccount(namespace, serviceAccount string) (CleanUpFunc, error) {
277277
ctx := context.Background()
278278
sa := &v1.ServiceAccount{
279279
ObjectMeta: metav1.ObjectMeta{
@@ -334,7 +334,7 @@ func (f *Framework) GetLogs(namespace string, podName, containerName string) (st
334334
return string(logs), err
335335
}
336336

337-
func (f *Framework) CreateClusterRoleBinding(namespace, serviceAccount, clusterRole string) (cleanUpFunc, error) {
337+
func (f *Framework) CreateClusterRoleBinding(namespace, serviceAccount, clusterRole string) (CleanUpFunc, error) {
338338
ctx := context.Background()
339339
clusterRoleBinding := &rbacv1.ClusterRoleBinding{
340340
ObjectMeta: metav1.ObjectMeta{
@@ -367,7 +367,87 @@ func (f *Framework) CreateClusterRoleBinding(namespace, serviceAccount, clusterR
367367
}, nil
368368
}
369369

370-
func (f *Framework) CreateRoleBindingFromClusterRole(namespace, serviceAccount, clusterRole string) (cleanUpFunc, error) {
370+
func (f *Framework) CreateRoleBindingFromTypedRole(namespace, serviceAccount string, typedRole *rbacv1.Role) (CleanUpFunc, error) {
371+
ctx := context.Background()
372+
373+
role, err := f.KubeClient.RbacV1().Roles(namespace).Create(ctx, typedRole, metav1.CreateOptions{})
374+
if err != nil {
375+
if apierrors.IsAlreadyExists(err) {
376+
return nil, errors.Errorf("role %s already exists", typedRole.Name)
377+
}
378+
return nil, err
379+
}
380+
381+
roleBinding := &rbacv1.RoleBinding{
382+
ObjectMeta: metav1.ObjectMeta{
383+
Name: fmt.Sprintf("%s-%s", serviceAccount, typedRole.Name),
384+
Labels: map[string]string{
385+
E2eTestLabelName: E2eTestLabelValue,
386+
},
387+
},
388+
Subjects: []rbacv1.Subject{
389+
{
390+
Kind: "ServiceAccount",
391+
Name: serviceAccount,
392+
Namespace: namespace,
393+
},
394+
},
395+
RoleRef: rbacv1.RoleRef{
396+
Kind: "Role",
397+
Name: role.Name,
398+
APIGroup: "rbac.authorization.k8s.io",
399+
},
400+
}
401+
402+
roleBinding, err = f.KubeClient.RbacV1().RoleBindings(namespace).Create(ctx, roleBinding, metav1.CreateOptions{})
403+
if err != nil {
404+
if apierrors.IsAlreadyExists(err) {
405+
return nil, errors.Errorf("%s %s already exists", roleBinding.GroupVersionKind(), roleBinding.Name)
406+
}
407+
return nil, err
408+
}
409+
410+
// Wait for the role and role binding to be ready.
411+
err = Poll(10*time.Second, time.Minute, func() error {
412+
_, err := f.KubeClient.RbacV1().Roles(namespace).Get(ctx, role.Name, metav1.GetOptions{})
413+
if err != nil {
414+
return err
415+
}
416+
_, err = f.KubeClient.RbacV1().RoleBindings(namespace).Get(ctx, roleBinding.Name, metav1.GetOptions{})
417+
if err != nil {
418+
return err
419+
}
420+
return nil
421+
})
422+
423+
return func() error {
424+
err := f.KubeClient.RbacV1().Roles(namespace).Delete(ctx, role.Name, metav1.DeleteOptions{})
425+
if err != nil {
426+
return err
427+
}
428+
err = f.KubeClient.RbacV1().RoleBindings(namespace).Delete(ctx, roleBinding.Name, metav1.DeleteOptions{})
429+
if err != nil {
430+
return err
431+
}
432+
433+
// Wait for the role and role binding to be deleted.
434+
err = Poll(10*time.Second, time.Minute, func() error {
435+
_, err := f.KubeClient.RbacV1().Roles(namespace).Get(ctx, role.Name, metav1.GetOptions{})
436+
if err == nil {
437+
return errors.Errorf("%s %s still exists", role.GroupVersionKind(), role.Name)
438+
}
439+
_, err = f.KubeClient.RbacV1().RoleBindings(namespace).Get(ctx, roleBinding.Name, metav1.GetOptions{})
440+
if err == nil {
441+
return errors.Errorf("%s %s still exists", roleBinding.GroupVersionKind(), roleBinding.Name)
442+
}
443+
return nil
444+
})
445+
446+
return err
447+
}, nil
448+
}
449+
450+
func (f *Framework) CreateRoleBindingFromClusterRole(namespace, serviceAccount, clusterRole string) (CleanUpFunc, error) {
371451
ctx := context.Background()
372452
roleBinding := &rbacv1.RoleBinding{
373453
ObjectMeta: metav1.ObjectMeta{
@@ -400,7 +480,7 @@ func (f *Framework) CreateRoleBindingFromClusterRole(namespace, serviceAccount,
400480
}, nil
401481
}
402482

403-
func (f *Framework) CreateRoleBindingFromRole(namespace, serviceAccount, role string) (cleanUpFunc, error) {
483+
func (f *Framework) CreateRoleBindingFromRole(namespace, serviceAccount, role string) (CleanUpFunc, error) {
404484
ctx := context.Background()
405485
roleBinding := &rbacv1.RoleBinding{
406486
ObjectMeta: metav1.ObjectMeta{

test/e2e/user_workload_monitoring_test.go

Lines changed: 150 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,17 @@ import (
2626
"time"
2727

2828
"github.com/Jeffail/gabs"
29-
"github.com/openshift/cluster-monitoring-operator/test/e2e/framework"
3029
"github.com/pkg/errors"
3130
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
3231
appsv1 "k8s.io/api/apps/v1"
3332
v1 "k8s.io/api/core/v1"
33+
rbacv1 "k8s.io/api/rbac/v1"
3434
apierrors "k8s.io/apimachinery/pkg/api/errors"
3535
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3636
"k8s.io/apimachinery/pkg/util/wait"
3737
"k8s.io/client-go/util/cert"
38+
39+
"github.com/openshift/cluster-monitoring-operator/test/e2e/framework"
3840
)
3941

4042
type scenario struct {
@@ -525,16 +527,10 @@ func assertTenancyForMetrics(t *testing.T) {
525527

526528
err := framework.Poll(2*time.Second, 10*time.Second, func() error {
527529
_, err := f.CreateServiceAccount(userWorkloadTestNs, testAccount)
528-
return err
529-
})
530-
if err != nil {
531-
t.Fatal(err)
532-
}
533-
534-
// Grant enough permissions to the account so it can read metrics.
535-
err = framework.Poll(2*time.Second, 10*time.Second, func() error {
536-
_, err = f.CreateRoleBindingFromClusterRole(userWorkloadTestNs, testAccount, "admin")
537-
return err
530+
if !apierrors.IsAlreadyExists(err) {
531+
return err
532+
}
533+
return nil
538534
})
539535
if err != nil {
540536
t.Fatal(err)
@@ -576,6 +572,32 @@ func assertTenancyForMetrics(t *testing.T) {
576572
t.Run(tc.name, func(t *testing.T) {
577573
t.Logf("Running query %q", tc.query)
578574

575+
var cleanupFn func() error
576+
// Grant just-enough permissions to the account so it can read metrics.
577+
err = framework.Poll(2*time.Second, 10*time.Second, func() error {
578+
cleanupFn, err = f.CreateRoleBindingFromTypedRole(userWorkloadTestNs, testAccount, &rbacv1.Role{
579+
ObjectMeta: metav1.ObjectMeta{
580+
Name: "tenancy-test-metrics",
581+
},
582+
Rules: []rbacv1.PolicyRule{
583+
{
584+
APIGroups: []string{"metrics.k8s.io"},
585+
Resources: []string{"pods"},
586+
Verbs: []string{"get"},
587+
},
588+
},
589+
})
590+
return err
591+
})
592+
if err != nil {
593+
t.Fatal(err)
594+
}
595+
defer func() {
596+
if err := cleanupFn(); err != nil {
597+
t.Fatal(err)
598+
}
599+
}()
600+
579601
err = framework.Poll(5*time.Second, time.Minute, func() error {
580602
// The tenancy port (9092) is only exposed in-cluster so we need to use
581603
// port forwarding to access kube-rbac-proxy.
@@ -681,6 +703,123 @@ func assertTenancyForMetrics(t *testing.T) {
681703
if err != nil {
682704
t.Fatalf("the account has access to the rules endpoint of Thanos querier: %v", err)
683705
}
706+
707+
for _, tc := range []struct {
708+
// NOTE: role verbs need to factor in the permissions metrics-server requires for the respective use-case.
709+
role rbacv1.Role
710+
expectNotOKOnQuery bool
711+
desc string
712+
method string
713+
}{
714+
715+
{
716+
role: rbacv1.Role{
717+
ObjectMeta: metav1.ObjectMeta{
718+
Name: "tenancy-test-metrics",
719+
},
720+
Rules: []rbacv1.PolicyRule{
721+
{
722+
APIGroups: []string{"metrics.k8s.io"},
723+
Resources: []string{"pods"},
724+
Verbs: []string{"get"},
725+
},
726+
},
727+
},
728+
method: http.MethodPost,
729+
expectNotOKOnQuery: true,
730+
desc: "should disallow POST queries to the endpoint for SA with no create permission",
731+
},
732+
{
733+
role: rbacv1.Role{
734+
ObjectMeta: metav1.ObjectMeta{
735+
Name: "tenancy-test-metrics",
736+
},
737+
Rules: []rbacv1.PolicyRule{
738+
{
739+
APIGroups: []string{"metrics.k8s.io"},
740+
Resources: []string{"pods"},
741+
Verbs: []string{"get"},
742+
},
743+
},
744+
},
745+
method: http.MethodGet,
746+
desc: "should allow GET queries to the endpoint for SA with get permission",
747+
},
748+
{
749+
role: rbacv1.Role{
750+
ObjectMeta: metav1.ObjectMeta{
751+
Name: "tenancy-test-metrics",
752+
},
753+
Rules: []rbacv1.PolicyRule{
754+
{
755+
APIGroups: []string{"metrics.k8s.io"},
756+
Resources: []string{"pods"},
757+
Verbs: []string{"get", "create"},
758+
},
759+
},
760+
},
761+
method: http.MethodPost,
762+
desc: "should allow POST queries to the endpoint for SA with get and create permission",
763+
},
764+
} {
765+
t.Run(tc.desc, func(t *testing.T) {
766+
var cleanupFn framework.CleanUpFunc
767+
768+
// Create a role binding for the test SA.
769+
err = framework.Poll(time.Second, time.Minute, func() error {
770+
cleanupFn, err = f.CreateRoleBindingFromTypedRole(userWorkloadTestNs, testAccount, &tc.role)
771+
return err
772+
})
773+
if err != nil {
774+
t.Fatal(err)
775+
}
776+
777+
// Remove associated artifacts.
778+
defer func() {
779+
if err := cleanupFn(); err != nil {
780+
t.Fatal(err)
781+
}
782+
}()
783+
784+
// Forward the tenancy port.
785+
host, cleanUp, err := f.ForwardPort(t, f.Ns, "thanos-querier", 9092)
786+
if err != nil {
787+
t.Fatal(err)
788+
}
789+
defer cleanUp()
790+
791+
// Create a Prometheus client with the test SA token.
792+
client := framework.NewPrometheusClient(
793+
host,
794+
token,
795+
&framework.QueryParameterInjector{
796+
Name: "namespace",
797+
Value: userWorkloadTestNs,
798+
},
799+
)
800+
801+
resp, err := client.Do(tc.method, "/api/v1/query?namespace="+userWorkloadTestNs+"&query=up", nil)
802+
if err != nil {
803+
t.Fatal(err)
804+
}
805+
defer resp.Body.Close()
806+
// Body: {"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"up",...},"value":[1695582946.784,"1"]}]}}
807+
respBodyBytes, err := io.ReadAll(resp.Body)
808+
if err != nil {
809+
t.Fatal(err)
810+
}
811+
812+
if tc.expectNotOKOnQuery {
813+
if resp.StatusCode == http.StatusOK {
814+
t.Fatal("expected request to be rejected, but succeeded")
815+
}
816+
} else {
817+
if resp.StatusCode != http.StatusOK {
818+
t.Fatalf("expected request to be accepted, but got status code %d (%s)", resp.StatusCode, respBodyBytes)
819+
}
820+
}
821+
})
822+
}
684823
}
685824

686825
// assertTenancyForRules ensures that a tenant can access rules from her namespace (and only from this one).

0 commit comments

Comments
 (0)