Skip to content

Commit c29863b

Browse files
authored
Prevent OLM from creating InstallPlans when bundle unpack fails (#2942)
* Changes how `InstallPlan`s are being created Prevent OLM from creating `InstallPlan`s when bundle unpack fails Signed-off-by: Mikalai Radchuk <[email protected]> * Updates unit tests for syncResolvingNamespace Tests now include handling of unpacking errors Signed-off-by: Mikalai Radchuk <[email protected]> * Renames test data files Signed-off-by: Mikalai Radchuk <[email protected]> * Updates E2E tests Changes required to account for a new flow where we prevent `InstallPlan` from being created when unpack job fails Signed-off-by: Mikalai Radchuk <[email protected]> --------- Signed-off-by: Mikalai Radchuk <[email protected]>
1 parent 3ee218b commit c29863b

12 files changed

+695
-538
lines changed

pkg/controller/bundle/bundle_unpacker.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ import (
3535

3636
const (
3737
// TODO: This can be a spec field
38-
// BundleUnpackTimeoutAnnotationKey allows setting a bundle unpack timeout per InstallPlan
38+
// BundleUnpackTimeoutAnnotationKey allows setting a bundle unpack timeout per OperatorGroup
3939
// and overrides the default specified by the --bundle-unpack-timeout flag
4040
// The time duration should be in the same format as accepted by time.ParseDuration()
4141
// e.g 1m30s

pkg/controller/operators/catalog/operator.go

+115-104
Original file line numberDiff line numberDiff line change
@@ -934,7 +934,13 @@ func (o *Operator) syncResolvingNamespace(obj interface{}) error {
934934
return err
935935
}
936936

937-
failForwardEnabled, err := resolver.IsFailForwardEnabled(o.lister.OperatorsV1().OperatorGroupLister().OperatorGroups(namespace))
937+
ogLister := o.lister.OperatorsV1().OperatorGroupLister().OperatorGroups(namespace)
938+
failForwardEnabled, err := resolver.IsFailForwardEnabled(ogLister)
939+
if err != nil {
940+
return err
941+
}
942+
943+
unpackTimeout, err := bundle.OperatorGroupBundleUnpackTimeout(ogLister)
938944
if err != nil {
939945
return err
940946
}
@@ -1028,6 +1034,80 @@ func (o *Operator) syncResolvingNamespace(obj interface{}) error {
10281034
return err
10291035
}
10301036

1037+
// Attempt to unpack bundles before installing
1038+
// Note: This should probably use the attenuated client to prevent users from resolving resources they otherwise don't have access to.
1039+
if len(bundleLookups) > 0 {
1040+
logger.Debug("unpacking bundles")
1041+
1042+
var unpacked bool
1043+
unpacked, steps, bundleLookups, err = o.unpackBundles(namespace, steps, bundleLookups, unpackTimeout)
1044+
if err != nil {
1045+
// If the error was fatal capture and fail
1046+
if olmerrors.IsFatal(err) {
1047+
_, updateErr := o.updateSubscriptionStatuses(
1048+
o.setSubsCond(subs, v1alpha1.SubscriptionCondition{
1049+
Type: v1alpha1.SubscriptionBundleUnpackFailed,
1050+
Reason: "ErrorPreventedUnpacking",
1051+
Message: err.Error(),
1052+
Status: corev1.ConditionTrue,
1053+
}))
1054+
if updateErr != nil {
1055+
logger.WithError(updateErr).Debug("failed to update subs conditions")
1056+
return updateErr
1057+
}
1058+
return nil
1059+
}
1060+
// Retry sync if non-fatal error
1061+
return fmt.Errorf("bundle unpacking failed with an error: %w", err)
1062+
}
1063+
1064+
// Check BundleLookup status conditions to see if the BundleLookupFailed condtion is true
1065+
// which means bundle lookup has failed and subscriptions need to be updated
1066+
// with a condition indicating the failure.
1067+
isFailed, cond := hasBundleLookupFailureCondition(bundleLookups)
1068+
if isFailed {
1069+
err := fmt.Errorf("bundle unpacking failed. Reason: %v, and Message: %v", cond.Reason, cond.Message)
1070+
logger.Infof("%v", err)
1071+
1072+
_, updateErr := o.updateSubscriptionStatuses(
1073+
o.setSubsCond(subs, v1alpha1.SubscriptionCondition{
1074+
Type: v1alpha1.SubscriptionBundleUnpackFailed,
1075+
Reason: "BundleUnpackFailed",
1076+
Message: err.Error(),
1077+
Status: corev1.ConditionTrue,
1078+
}))
1079+
if updateErr != nil {
1080+
logger.WithError(updateErr).Debug("failed to update subs conditions")
1081+
return updateErr
1082+
}
1083+
// Since this is likely requires intervention we do not want to
1084+
// requeue too often. We return no error here and rely on a
1085+
// periodic resync which will help to automatically resolve
1086+
// some issues such as unreachable bundle images caused by
1087+
// bad catalog updates.
1088+
return nil
1089+
}
1090+
1091+
// This means that the unpack job is still running (most likely) or
1092+
// there was some issue which we did not handle above.
1093+
if !unpacked {
1094+
_, updateErr := o.updateSubscriptionStatuses(
1095+
o.setSubsCond(subs, v1alpha1.SubscriptionCondition{
1096+
Type: v1alpha1.SubscriptionBundleUnpacking,
1097+
Reason: "UnpackingInProgress",
1098+
Status: corev1.ConditionTrue,
1099+
}))
1100+
if updateErr != nil {
1101+
logger.WithError(updateErr).Debug("failed to update subs conditions")
1102+
return updateErr
1103+
}
1104+
1105+
logger.Debug("unpacking is not complete yet, requeueing")
1106+
o.nsResolveQueue.AddAfter(namespace, 5*time.Second)
1107+
return nil
1108+
}
1109+
}
1110+
10311111
// create installplan if anything updated
10321112
if len(updatedSubs) > 0 {
10331113
logger.Debug("resolution caused subscription changes, creating installplan")
@@ -1062,8 +1142,17 @@ func (o *Operator) syncResolvingNamespace(obj interface{}) error {
10621142
logger.Debugf("no subscriptions were updated")
10631143
}
10641144

1145+
// Make sure that we no longer indicate unpacking progress
1146+
subs = o.setSubsCond(subs, v1alpha1.SubscriptionCondition{
1147+
Type: v1alpha1.SubscriptionBundleUnpacking,
1148+
Status: corev1.ConditionFalse,
1149+
})
1150+
1151+
// Remove BundleUnpackFailed condition from subscriptions
1152+
o.removeSubsCond(subs, v1alpha1.SubscriptionBundleUnpackFailed)
1153+
10651154
// Remove resolutionfailed condition from subscriptions
1066-
subs = o.removeSubsCond(subs, v1alpha1.SubscriptionResolutionFailed)
1155+
o.removeSubsCond(subs, v1alpha1.SubscriptionResolutionFailed)
10671156
newSub := true
10681157
for _, updatedSub := range updatedSubs {
10691158
updatedSub.Status.RemoveConditions(v1alpha1.SubscriptionResolutionFailed)
@@ -1408,19 +1497,27 @@ type UnpackedBundleReference struct {
14081497
Properties string `json:"properties"`
14091498
}
14101499

1411-
func (o *Operator) unpackBundles(plan *v1alpha1.InstallPlan, unpackTimeout time.Duration) (bool, *v1alpha1.InstallPlan, error) {
1412-
out := plan.DeepCopy()
1500+
func (o *Operator) unpackBundles(namespace string, installPlanSteps []*v1alpha1.Step, bundleLookups []v1alpha1.BundleLookup, unpackTimeout time.Duration) (bool, []*v1alpha1.Step, []v1alpha1.BundleLookup, error) {
14131501
unpacked := true
14141502

1503+
outBundleLookups := make([]v1alpha1.BundleLookup, len(bundleLookups))
1504+
for i := range bundleLookups {
1505+
bundleLookups[i].DeepCopyInto(&outBundleLookups[i])
1506+
}
1507+
outInstallPlanSteps := make([]*v1alpha1.Step, len(installPlanSteps))
1508+
for i := range installPlanSteps {
1509+
outInstallPlanSteps[i] = installPlanSteps[i].DeepCopy()
1510+
}
1511+
14151512
var errs []error
1416-
for i := 0; i < len(out.Status.BundleLookups); i++ {
1417-
lookup := out.Status.BundleLookups[i]
1513+
for i := 0; i < len(outBundleLookups); i++ {
1514+
lookup := outBundleLookups[i]
14181515
res, err := o.bundleUnpacker.UnpackBundle(&lookup, unpackTimeout)
14191516
if err != nil {
14201517
errs = append(errs, err)
14211518
continue
14221519
}
1423-
out.Status.BundleLookups[i] = *res.BundleLookup
1520+
outBundleLookups[i] = *res.BundleLookup
14241521

14251522
// if the failed condition is present it means the bundle unpacking has failed
14261523
failedCondition := res.GetCondition(v1alpha1.BundleLookupFailed)
@@ -1442,11 +1539,11 @@ func (o *Operator) unpackBundles(plan *v1alpha1.InstallPlan, unpackTimeout time.
14421539
continue
14431540
}
14441541

1445-
// Ensure that bundle can be applied by the current version of OLM by converting to steps
1446-
steps, err := resolver.NewStepsFromBundle(res.Bundle(), out.GetNamespace(), res.Replaces, res.CatalogSourceRef.Name, res.CatalogSourceRef.Namespace)
1542+
// Ensure that bundle can be applied by the current version of OLM by converting to bundleSteps
1543+
bundleSteps, err := resolver.NewStepsFromBundle(res.Bundle(), namespace, res.Replaces, res.CatalogSourceRef.Name, res.CatalogSourceRef.Namespace)
14471544
if err != nil {
14481545
if fatal := olmerrors.IsFatal(err); fatal {
1449-
return false, nil, err
1546+
return false, nil, nil, err
14501547
}
14511548

14521549
errs = append(errs, fmt.Errorf("failed to turn bundle into steps: %v", err))
@@ -1455,7 +1552,7 @@ func (o *Operator) unpackBundles(plan *v1alpha1.InstallPlan, unpackTimeout time.
14551552
}
14561553

14571554
// step manifests are replaced with references to the configmap containing them
1458-
for i, s := range steps {
1555+
for i, s := range bundleSteps {
14591556
ref := UnpackedBundleReference{
14601557
Kind: "ConfigMap",
14611558
Namespace: res.CatalogSourceRef.Namespace,
@@ -1472,19 +1569,19 @@ func (o *Operator) unpackBundles(plan *v1alpha1.InstallPlan, unpackTimeout time.
14721569
continue
14731570
}
14741571
s.Resource.Manifest = string(r)
1475-
steps[i] = s
1572+
bundleSteps[i] = s
14761573
}
14771574
res.RemoveCondition(resolver.BundleLookupConditionPacked)
1478-
out.Status.BundleLookups[i] = *res.BundleLookup
1479-
out.Status.Plan = append(out.Status.Plan, steps...)
1575+
outBundleLookups[i] = *res.BundleLookup
1576+
outInstallPlanSteps = append(outInstallPlanSteps, bundleSteps...)
14801577
}
14811578

14821579
if err := utilerrors.NewAggregate(errs); err != nil {
14831580
o.logger.Debugf("failed to unpack bundles: %v", err)
1484-
return false, nil, err
1581+
return false, nil, nil, err
14851582
}
14861583

1487-
return unpacked, out, nil
1584+
return unpacked, outInstallPlanSteps, outBundleLookups, nil
14881585
}
14891586

14901587
// gcInstallPlans garbage collects installplans that are too old
@@ -1631,71 +1728,6 @@ func (o *Operator) syncInstallPlans(obj interface{}) (syncError error) {
16311728
}
16321729
}
16331730

1634-
ogLister := o.lister.OperatorsV1().OperatorGroupLister().OperatorGroups(plan.GetNamespace())
1635-
unpackTimeout, err := bundle.OperatorGroupBundleUnpackTimeout(ogLister)
1636-
if err != nil {
1637-
return err
1638-
}
1639-
1640-
// Attempt to unpack bundles before installing
1641-
// Note: This should probably use the attenuated client to prevent users from resolving resources they otherwise don't have access to.
1642-
if len(plan.Status.BundleLookups) > 0 {
1643-
unpacked, out, err := o.unpackBundles(plan, unpackTimeout)
1644-
if err != nil {
1645-
// If the error was fatal capture and fail
1646-
if fatal := olmerrors.IsFatal(err); fatal {
1647-
if err := o.transitionInstallPlanToFailed(plan, logger, v1alpha1.InstallPlanReasonInstallCheckFailed, err.Error()); err != nil {
1648-
// retry for failure to update status
1649-
syncError = err
1650-
return
1651-
}
1652-
}
1653-
// Retry sync if non-fatal error
1654-
syncError = fmt.Errorf("bundle unpacking failed: %v", err)
1655-
return
1656-
}
1657-
1658-
if !reflect.DeepEqual(plan.Status, out.Status) {
1659-
logger.Warnf("status not equal, updating...")
1660-
if _, err := o.client.OperatorsV1alpha1().InstallPlans(out.GetNamespace()).UpdateStatus(context.TODO(), out, metav1.UpdateOptions{}); err != nil {
1661-
syncError = fmt.Errorf("failed to update installplan bundle lookups: %v", err)
1662-
}
1663-
1664-
return
1665-
}
1666-
1667-
// Check BundleLookup status conditions to see if the BundleLookupPending condtion is false
1668-
// which means bundle lookup has failed and the InstallPlan should be failed as well
1669-
isFailed, cond := hasBundleLookupFailureCondition(plan)
1670-
if isFailed {
1671-
err := fmt.Errorf("bundle unpacking failed. Reason: %v, and Message: %v", cond.Reason, cond.Message)
1672-
// Mark the InstallPlan as failed for a fatal bundle unpack error
1673-
logger.Infof("%v", err)
1674-
1675-
if err := o.transitionInstallPlanToFailed(plan, logger, v1alpha1.InstallPlanReasonInstallCheckFailed, err.Error()); err != nil {
1676-
// retry for failure to update status
1677-
syncError = err
1678-
return
1679-
}
1680-
1681-
// Requeue subscription to propagate SubscriptionInstallPlanFailed condtion to subscription
1682-
o.requeueSubscriptionForInstallPlan(plan, logger)
1683-
return
1684-
}
1685-
1686-
// TODO: Remove in favor of job and configmap informer requeuing
1687-
if !unpacked {
1688-
err := o.ipQueueSet.RequeueAfter(plan.GetNamespace(), plan.GetName(), 5*time.Second)
1689-
if err != nil {
1690-
syncError = err
1691-
return
1692-
}
1693-
logger.Debug("install plan not yet populated from bundle image, requeueing")
1694-
1695-
return
1696-
}
1697-
}
1698-
16991731
outInstallPlan, syncError := transitionInstallPlanState(logger.Logger, o, *plan, o.now(), o.installPlanTimeout)
17001732

17011733
if syncError != nil {
@@ -1723,8 +1755,8 @@ func (o *Operator) syncInstallPlans(obj interface{}) (syncError error) {
17231755
return
17241756
}
17251757

1726-
func hasBundleLookupFailureCondition(plan *v1alpha1.InstallPlan) (bool, *v1alpha1.BundleLookupCondition) {
1727-
for _, bundleLookup := range plan.Status.BundleLookups {
1758+
func hasBundleLookupFailureCondition(bundleLookups []v1alpha1.BundleLookup) (bool, *v1alpha1.BundleLookupCondition) {
1759+
for _, bundleLookup := range bundleLookups {
17281760
for _, cond := range bundleLookup.Conditions {
17291761
if cond.Type == v1alpha1.BundleLookupFailed && cond.Status == corev1.ConditionTrue {
17301762
return true, &cond
@@ -1734,27 +1766,6 @@ func hasBundleLookupFailureCondition(plan *v1alpha1.InstallPlan) (bool, *v1alpha
17341766
return false, nil
17351767
}
17361768

1737-
func (o *Operator) transitionInstallPlanToFailed(plan *v1alpha1.InstallPlan, logger logrus.FieldLogger, reason v1alpha1.InstallPlanConditionReason, message string) error {
1738-
now := o.now()
1739-
out := plan.DeepCopy()
1740-
out.Status.SetCondition(v1alpha1.ConditionFailed(v1alpha1.InstallPlanInstalled,
1741-
reason, message, &now))
1742-
out.Status.Phase = v1alpha1.InstallPlanPhaseFailed
1743-
1744-
logger.Info("transitioning InstallPlan to failed")
1745-
_, err := o.client.OperatorsV1alpha1().InstallPlans(plan.GetNamespace()).UpdateStatus(context.TODO(), out, metav1.UpdateOptions{})
1746-
if err == nil {
1747-
return nil
1748-
}
1749-
1750-
updateErr := errors.New("error updating InstallPlan status: " + err.Error())
1751-
logger = logger.WithField("updateError", updateErr)
1752-
logger.Errorf("error transitioning InstallPlan to failed")
1753-
1754-
// retry sync with error to update InstallPlan status
1755-
return fmt.Errorf("installplan failed: %s and error updating InstallPlan status as failed: %s", message, updateErr)
1756-
}
1757-
17581769
func (o *Operator) requeueSubscriptionForInstallPlan(plan *v1alpha1.InstallPlan, logger *logrus.Entry) {
17591770
// Notify subscription loop of installplan changes
17601771
owners := ownerutil.GetOwnersByKind(plan, v1alpha1.SubscriptionKind)

0 commit comments

Comments
 (0)