Skip to content

Commit 09ee0e3

Browse files
committed
Fix issues with oc adm migrate authorization
This change handles a number of bugs with the migrate authorization command: 1. Rework MigrateAuthorizationOptions.checkParity to be a MigrateActionFunc instead of a MigrateVisitFunc. This allows us to take advantage of migrate's default retry handling. 2. Add proper retry logic to checkParity, and have all check* funcs return the appropriate TemporaryError based on the situation. This coupled with migrate's existing retry logic makes the command resilient against common errors such as the deletion of resources. 3. Remove the binding of the standard migrate flags from migrate authorization. This command supports no parameters, and exposing the standard migrate parameters allows the user to accidentally break how the command runs. 4. Fix GroupVersion constants used for discovery based gating. They were incorrectly set to the internal version instead of v1. This would cause the policy based gating to always think that the server did not support policy objects. 5. Force RBAC client to use v1beta1 since that is the only version supported by a 3.6 server. This allows you to use a 3.9 client against a 3.6 server. 6. Remove rate limiting from the RBAC client to fix BZ 1513139. Only a cluster admin can interact with RBAC resources on a 3.6 server, so this will quickly error out if run by a non-privileged user. Bug 1513139 Signed-off-by: Monis Khan <[email protected]>
1 parent 4bb2151 commit 09ee0e3

File tree

4 files changed

+129
-75
lines changed

4 files changed

+129
-75
lines changed

pkg/cmd/util/clientcmd/gating.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import (
88
"k8s.io/apimachinery/pkg/runtime/schema"
99
"k8s.io/client-go/discovery"
1010

11-
"github.com/openshift/origin/pkg/authorization/apis/authorization"
11+
authorization "github.com/openshift/origin/pkg/authorization/apis/authorization/v1"
1212
)
1313

1414
// LegacyPolicyResourceGate returns err if the server does not support the set of legacy policy objects (< 3.7)

pkg/oc/admin/migrate/authorization/authorization.go

+117-65
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@ import (
55
"fmt"
66
"io"
77

8+
kerrs "k8s.io/apimachinery/pkg/api/errors"
89
"k8s.io/apimachinery/pkg/apis/meta/v1"
9-
"k8s.io/apimachinery/pkg/runtime"
10-
utilerrors "k8s.io/apimachinery/pkg/util/errors"
10+
"k8s.io/client-go/util/flowcontrol"
11+
"k8s.io/kubernetes/pkg/apis/rbac/v1beta1"
1112
rbacinternalversion "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/rbac/internalversion"
1213
"k8s.io/kubernetes/pkg/kubectl/cmd/templates"
1314
kcmdutil "k8s.io/kubernetes/pkg/kubectl/cmd/util"
@@ -39,7 +40,8 @@ var (
3940
4041
No resources are mutated.`)
4142

42-
errOutOfSync = errors.New("is not in sync with RBAC")
43+
// errOutOfSync is retriable since it could be caused by the controller lagging behind
44+
errOutOfSync = migrate.ErrRetriable{errors.New("is not in sync with RBAC")}
4345
)
4446

4547
type MigrateAuthorizationOptions struct {
@@ -54,6 +56,7 @@ func NewCmdMigrateAuthorization(name, fullName string, f *clientcmd.Factory, in
5456
Out: out,
5557
ErrOut: errout,
5658
AllNamespaces: true,
59+
Confirm: true, // force our save function to always run (it is read only)
5760
Include: []string{
5861
"clusterroles.authorization.openshift.io",
5962
"roles.authorization.openshift.io",
@@ -81,20 +84,40 @@ func (o *MigrateAuthorizationOptions) Complete(name string, f *clientcmd.Factory
8184
return fmt.Errorf("%s takes no positional arguments", name)
8285
}
8386

87+
o.ResourceOptions.SaveFn = o.checkParity
8488
if err := o.ResourceOptions.Complete(f, c); err != nil {
8589
return err
8690
}
8791

88-
kclient, err := f.ClientSet()
92+
discovery, err := f.DiscoveryClient()
8993
if err != nil {
9094
return err
9195
}
9296

93-
if err := clientcmd.LegacyPolicyResourceGate(kclient.Discovery()); err != nil {
97+
if err := clientcmd.LegacyPolicyResourceGate(discovery); err != nil {
9498
return err
9599
}
96100

97-
o.rbac = kclient.Rbac()
101+
config, err := f.ClientConfig()
102+
if err != nil {
103+
return err
104+
}
105+
106+
// do not rate limit this client because it has to scan all RBAC data across the cluster
107+
// this is safe because only a cluster admin will have the ability to read these objects
108+
configShallowCopy := *config
109+
configShallowCopy.RateLimiter = flowcontrol.NewFakeAlwaysRateLimiter()
110+
111+
// This command is only compatible with a 3.6 server, which only supported RBAC v1beta1
112+
// Thus we must force that GV otherwise the client will default to v1
113+
gv := v1beta1.SchemeGroupVersion
114+
configShallowCopy.GroupVersion = &gv
115+
116+
rbac, err := rbacinternalversion.NewForConfig(&configShallowCopy)
117+
if err != nil {
118+
return err
119+
}
120+
o.rbac = rbac
98121

99122
return nil
100123
}
@@ -104,130 +127,159 @@ func (o MigrateAuthorizationOptions) Validate() error {
104127
}
105128

106129
func (o MigrateAuthorizationOptions) Run() error {
107-
return o.ResourceOptions.Visitor().Visit(func(info *resource.Info) (migrate.Reporter, error) {
108-
return o.checkParity(info.Object)
109-
})
130+
// we lie and say this object has changed so our save function will run
131+
return o.ResourceOptions.Visitor().Visit(migrate.AlwaysRequiresMigration)
110132
}
111133

112134
// checkParity confirms that Openshift authorization objects are in sync with Kubernetes RBAC
113135
// and returns an error if they are out of sync or if it encounters a conversion error
114-
func (o *MigrateAuthorizationOptions) checkParity(obj runtime.Object) (migrate.Reporter, error) {
115-
var errlist []error
116-
switch t := obj.(type) {
136+
func (o *MigrateAuthorizationOptions) checkParity(info *resource.Info, _ migrate.Reporter) error {
137+
var err migrate.TemporaryError
138+
139+
switch t := info.Object.(type) {
117140
case *authorizationapi.ClusterRole:
118-
errlist = append(errlist, o.checkClusterRole(t)...)
141+
err = o.checkClusterRole(t)
119142
case *authorizationapi.Role:
120-
errlist = append(errlist, o.checkRole(t)...)
143+
err = o.checkRole(t)
121144
case *authorizationapi.ClusterRoleBinding:
122-
errlist = append(errlist, o.checkClusterRoleBinding(t)...)
145+
err = o.checkClusterRoleBinding(t)
123146
case *authorizationapi.RoleBinding:
124-
errlist = append(errlist, o.checkRoleBinding(t)...)
147+
err = o.checkRoleBinding(t)
125148
default:
126-
return nil, nil // indicate that we ignored the object
149+
// this should never happen unless o.Include or the server is broken
150+
return fmt.Errorf("impossible type %T for checkParity info=%#v object=%#v", t, info, t)
151+
}
152+
153+
// We encountered no error, so this object is in sync.
154+
if err == nil {
155+
// we only perform read operations so we return this error to signal that we did not change anything
156+
return migrate.ErrUnchanged
157+
}
158+
159+
// At this point we know that we have some non-nil TemporaryError.
160+
// If it has the possibility of being transient, we need to sync ourselves with the current state of the object.
161+
if err.Temporary() {
162+
// The most likely cause is that an authorization object was deleted after we initially fetched it,
163+
// and the controller deleted the associated RBAC object, which caused our RBAC GET to fail.
164+
// We can confirm this by refetching the authorization object.
165+
refreshErr := info.Get()
166+
if refreshErr != nil {
167+
// Our refresh GET for this authorization object failed.
168+
// The default logic for migration errors is appropriate in this case (refreshErr is most likely a NotFound).
169+
return migrate.DefaultRetriable(info, refreshErr)
170+
}
171+
// We had no refreshErr, but encountered some other possibly transient error.
172+
// No special handling is required in this case, we just pass it through below.
127173
}
128-
return migrate.NotChanged, utilerrors.NewAggregate(errlist) // we only perform read operations
174+
175+
// All of the check* funcs return an appropriate TemporaryError based on the failure,
176+
// so we can pass that through to the default migration logic which will retry as needed.
177+
return err
129178
}
130179

131-
func (o *MigrateAuthorizationOptions) checkClusterRole(originClusterRole *authorizationapi.ClusterRole) []error {
132-
var errlist []error
180+
// handleRBACGetError signals for a retry on NotFound (handles deletion and sync lag)
181+
// and ServerTimeout (handles heavy load against the server).
182+
func handleRBACGetError(err error) migrate.TemporaryError {
183+
switch {
184+
case kerrs.IsNotFound(err), kerrs.IsServerTimeout(err):
185+
return migrate.ErrRetriable{err}
186+
default:
187+
return migrate.ErrNotRetriable{err}
188+
}
189+
}
133190

191+
func (o *MigrateAuthorizationOptions) checkClusterRole(originClusterRole *authorizationapi.ClusterRole) migrate.TemporaryError {
134192
// convert the origin role to a rbac role
135193
convertedClusterRole, err := util.ConvertToRBACClusterRole(originClusterRole)
136194
if err != nil {
137-
errlist = append(errlist, err)
195+
// conversion errors should basically never happen, so we do not attempt to retry on those
196+
return migrate.ErrNotRetriable{err}
138197
}
139198

140199
// try to get the equivalent rbac role from the api
141200
rbacClusterRole, err := o.rbac.ClusterRoles().Get(originClusterRole.Name, v1.GetOptions{})
142201
if err != nil {
143-
errlist = append(errlist, err)
202+
// it is possible that the controller has not synced this yet
203+
return handleRBACGetError(err)
144204
}
145205

146-
// compare the results if there have been no errors so far
147-
if len(errlist) == 0 {
148-
// if they are not equal, something has gone wrong and the two objects are not in sync
149-
if util.PrepareForUpdateClusterRole(convertedClusterRole, rbacClusterRole) {
150-
errlist = append(errlist, errOutOfSync)
151-
}
206+
// if they are not equal, something has gone wrong and the two objects are not in sync
207+
if util.PrepareForUpdateClusterRole(convertedClusterRole, rbacClusterRole) {
208+
// we retry on this since it could be caused by the controller lagging behind
209+
return errOutOfSync
152210
}
153211

154-
return errlist
212+
return nil
155213
}
156214

157-
func (o *MigrateAuthorizationOptions) checkRole(originRole *authorizationapi.Role) []error {
158-
var errlist []error
159-
215+
func (o *MigrateAuthorizationOptions) checkRole(originRole *authorizationapi.Role) migrate.TemporaryError {
160216
// convert the origin role to a rbac role
161217
convertedRole, err := util.ConvertToRBACRole(originRole)
162218
if err != nil {
163-
errlist = append(errlist, err)
219+
// conversion errors should basically never happen, so we do not attempt to retry on those
220+
return migrate.ErrNotRetriable{err}
164221
}
165222

166223
// try to get the equivalent rbac role from the api
167224
rbacRole, err := o.rbac.Roles(originRole.Namespace).Get(originRole.Name, v1.GetOptions{})
168225
if err != nil {
169-
errlist = append(errlist, err)
226+
// it is possible that the controller has not synced this yet
227+
return handleRBACGetError(err)
170228
}
171229

172-
// compare the results if there have been no errors so far
173-
if len(errlist) == 0 {
174-
// if they are not equal, something has gone wrong and the two objects are not in sync
175-
if util.PrepareForUpdateRole(convertedRole, rbacRole) {
176-
errlist = append(errlist, errOutOfSync)
177-
}
230+
// if they are not equal, something has gone wrong and the two objects are not in sync
231+
if util.PrepareForUpdateRole(convertedRole, rbacRole) {
232+
// we retry on this since it could be caused by the controller lagging behind
233+
return errOutOfSync
178234
}
179235

180-
return errlist
236+
return nil
181237
}
182238

183-
func (o *MigrateAuthorizationOptions) checkClusterRoleBinding(originRoleBinding *authorizationapi.ClusterRoleBinding) []error {
184-
var errlist []error
185-
239+
func (o *MigrateAuthorizationOptions) checkClusterRoleBinding(originRoleBinding *authorizationapi.ClusterRoleBinding) migrate.TemporaryError {
186240
// convert the origin role binding to a rbac role binding
187241
convertedRoleBinding, err := util.ConvertToRBACClusterRoleBinding(originRoleBinding)
188242
if err != nil {
189-
errlist = append(errlist, err)
243+
// conversion errors should basically never happen, so we do not attempt to retry on those
244+
return migrate.ErrNotRetriable{err}
190245
}
191246

192247
// try to get the equivalent rbac role binding from the api
193248
rbacRoleBinding, err := o.rbac.ClusterRoleBindings().Get(originRoleBinding.Name, v1.GetOptions{})
194249
if err != nil {
195-
errlist = append(errlist, err)
250+
// it is possible that the controller has not synced this yet
251+
return handleRBACGetError(err)
196252
}
197253

198-
// compare the results if there have been no errors so far
199-
if len(errlist) == 0 {
200-
// if they are not equal, something has gone wrong and the two objects are not in sync
201-
if util.PrepareForUpdateClusterRoleBinding(convertedRoleBinding, rbacRoleBinding) {
202-
errlist = append(errlist, errOutOfSync)
203-
}
254+
// if they are not equal, something has gone wrong and the two objects are not in sync
255+
if util.PrepareForUpdateClusterRoleBinding(convertedRoleBinding, rbacRoleBinding) {
256+
// we retry on this since it could be caused by the controller lagging behind
257+
return errOutOfSync
204258
}
205259

206-
return errlist
260+
return nil
207261
}
208262

209-
func (o *MigrateAuthorizationOptions) checkRoleBinding(originRoleBinding *authorizationapi.RoleBinding) []error {
210-
var errlist []error
211-
263+
func (o *MigrateAuthorizationOptions) checkRoleBinding(originRoleBinding *authorizationapi.RoleBinding) migrate.TemporaryError {
212264
// convert the origin role binding to a rbac role binding
213265
convertedRoleBinding, err := util.ConvertToRBACRoleBinding(originRoleBinding)
214266
if err != nil {
215-
errlist = append(errlist, err)
267+
// conversion errors should basically never happen, so we do not attempt to retry on those
268+
return migrate.ErrNotRetriable{err}
216269
}
217270

218271
// try to get the equivalent rbac role binding from the api
219272
rbacRoleBinding, err := o.rbac.RoleBindings(originRoleBinding.Namespace).Get(originRoleBinding.Name, v1.GetOptions{})
220273
if err != nil {
221-
errlist = append(errlist, err)
274+
// it is possible that the controller has not synced this yet
275+
return handleRBACGetError(err)
222276
}
223277

224-
// compare the results if there have been no errors so far
225-
if len(errlist) == 0 {
226-
// if they are not equal, something has gone wrong and the two objects are not in sync
227-
if util.PrepareForUpdateRoleBinding(convertedRoleBinding, rbacRoleBinding) {
228-
errlist = append(errlist, errOutOfSync)
229-
}
278+
// if they are not equal, something has gone wrong and the two objects are not in sync
279+
if util.PrepareForUpdateRoleBinding(convertedRoleBinding, rbacRoleBinding) {
280+
// we retry on this since it could be caused by the controller lagging behind
281+
return errOutOfSync
230282
}
231283

232-
return errlist
284+
return nil
233285
}

pkg/oc/admin/migrate/migrator.go

+9-7
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,6 @@ func timeStampNow() string {
5555
return time.Now().Format("0102 15:04:05.000000")
5656
}
5757

58-
// NotChanged is a Reporter returned by operations that are guaranteed to be read-only
59-
var NotChanged = ReporterBool(false)
60-
6158
// ResourceOptions assists in performing migrations on any object that
6259
// can be retrieved via the API.
6360
type ResourceOptions struct {
@@ -351,23 +348,28 @@ var ErrUnchanged = fmt.Errorf("migration was not necessary")
351348
// both status and spec must be changed).
352349
var ErrRecalculate = fmt.Errorf("recalculate migration")
353350

351+
// MigrateError is an exported alias to error to allow external packages to use ErrRetriable and ErrNotRetriable
352+
type MigrateError error
353+
354354
// ErrRetriable is a wrapper for an error that a migrator may use to indicate the
355355
// specific error can be retried.
356356
type ErrRetriable struct {
357-
error
357+
MigrateError
358358
}
359359

360360
func (ErrRetriable) Temporary() bool { return true }
361361

362362
// ErrNotRetriable is a wrapper for an error that a migrator may use to indicate the
363363
// specific error cannot be retried.
364364
type ErrNotRetriable struct {
365-
error
365+
MigrateError
366366
}
367367

368368
func (ErrNotRetriable) Temporary() bool { return false }
369369

370-
type temporary interface {
370+
// TemporaryError is a wrapper interface that is used to determine if an error can be retried.
371+
type TemporaryError interface {
372+
error
371373
// Temporary should return true if this is a temporary error
372374
Temporary() bool
373375
}
@@ -479,7 +481,7 @@ func (t *migrateTracker) try(info *resource.Info, retries int) (attemptResult, e
479481

480482
// canRetry returns true if the provided error indicates a retry is possible.
481483
func canRetry(err error) bool {
482-
if temp, ok := err.(temporary); ok && temp.Temporary() {
484+
if temp, ok := err.(TemporaryError); ok && temp.Temporary() {
483485
return true
484486
}
485487
return err == ErrRecalculate

pkg/oc/cli/cmd/create/policy_binding.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,12 @@ func (o *CreatePolicyBindingOptions) Complete(cmd *cobra.Command, f *clientcmd.F
7272
}
7373
o.BindingNamespace = namespace
7474

75-
kclient, err := f.ClientSet()
75+
discovery, err := f.DiscoveryClient()
7676
if err != nil {
7777
return err
7878
}
7979

80-
if err := clientcmd.LegacyPolicyResourceGate(kclient.Discovery()); err != nil {
80+
if err := clientcmd.LegacyPolicyResourceGate(discovery); err != nil {
8181
return err
8282
}
8383
client, err := f.OpenshiftInternalAuthorizationClient()

0 commit comments

Comments
 (0)