Skip to content

Commit 4956334

Browse files
committed
cleanup old jobs
Signed-off-by: Ankita Thomas <[email protected]>
1 parent 66ec506 commit 4956334

File tree

5 files changed

+154
-34
lines changed

5 files changed

+154
-34
lines changed

pkg/controller/bundle/bundle_unpacker.go

+45-20
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"crypto/sha256"
66
"fmt"
7+
"sort"
78
"strings"
89
"time"
910

@@ -671,36 +672,29 @@ func (c *ConfigMapUnpacker) ensureJob(cmRef *corev1.ObjectReference, bundlePath
671672

672673
// only check for retries if an unpackRetryInterval is specified
673674
if unpackRetryInterval > 0 {
674-
if failedCond, isFailed := getCondition(job, batchv1.JobFailed); isFailed {
675-
lastFailureTime := failedCond.LastTransitionTime.Time
675+
maxRetainedJobs := 5 // TODO: make this configurable
676+
if _, isFailed := getCondition(job, batchv1.JobFailed); isFailed {
676677
// Look for other unpack jobs for the same bundle
677-
var jobs []*batchv1.Job
678+
var jobs, toDelete []*batchv1.Job
678679
jobs, err = c.jobLister.Jobs(fresh.GetNamespace()).List(k8slabels.ValidatedSetSelector{bundleUnpackRefLabel: cmRef.Name})
679-
if err != nil {
680+
if err != nil || len(jobs) == 0 {
680681
return
681682
}
682683

683-
var failed bool
684-
var cond *batchv1.JobCondition
685-
for _, j := range jobs {
686-
cond, failed = getCondition(j, batchv1.JobFailed)
687-
if !failed {
688-
// found an in-progress unpack attempt
689-
job = j
690-
break
691-
}
692-
if cond != nil && lastFailureTime.Before(cond.LastTransitionTime.Time) {
693-
lastFailureTime = cond.LastTransitionTime.Time
694-
}
695-
}
684+
job, toDelete = sortUnpackJobs(jobs, maxRetainedJobs)
696685

697-
if failed {
698-
if time.Now().After(lastFailureTime.Add(unpackRetryInterval)) {
686+
if cond, failed := getCondition(job, batchv1.JobFailed); failed {
687+
if time.Now().After(cond.LastTransitionTime.Time.Add(unpackRetryInterval)) {
699688
fresh.SetName(names.SimpleNameGenerator.GenerateName(fresh.GetName()))
700689
job, err = c.client.BatchV1().Jobs(fresh.GetNamespace()).Create(context.TODO(), fresh, metav1.CreateOptions{})
701690
}
702-
return
703691
}
692+
693+
// cleanup old failed jobs, but don't clean up successful jobs to avoid repeat unpacking
694+
for _, j := range toDelete {
695+
_ = c.client.BatchV1().Jobs(j.GetNamespace()).Delete(context.TODO(), j.GetName(), metav1.DeleteOptions{})
696+
}
697+
return
704698
}
705699
}
706700

@@ -845,6 +839,37 @@ func getCondition(job *batchv1.Job, conditionType batchv1.JobConditionType) (con
845839
return
846840
}
847841

842+
func sortUnpackJobs(jobs []*batchv1.Job, maxRetainedJobs int) (latest *batchv1.Job, toDelete []*batchv1.Job) {
843+
if len(jobs) == 0 {
844+
return
845+
}
846+
// sort jobs so that latest job is first
847+
// with preference for non-failed jobs
848+
sort.Slice(jobs, func(i, j int) bool {
849+
condI, failedI := getCondition(jobs[i], batchv1.JobFailed)
850+
condJ, failedJ := getCondition(jobs[j], batchv1.JobFailed)
851+
if failedI != failedJ {
852+
return !failedI // non-failed job goes first
853+
}
854+
return condI.LastTransitionTime.After(condJ.LastTransitionTime.Time)
855+
})
856+
latest = jobs[0]
857+
if len(jobs) <= maxRetainedJobs {
858+
return
859+
}
860+
if maxRetainedJobs == 0 {
861+
toDelete = jobs[1:]
862+
return
863+
}
864+
865+
// cleanup old failed jobs, n-1 recent jobs and the oldest job
866+
for i := 0; i < maxRetainedJobs && i+maxRetainedJobs < len(jobs); i++ {
867+
toDelete = append(toDelete, jobs[maxRetainedJobs+i])
868+
}
869+
870+
return
871+
}
872+
848873
// OperatorGroupBundleUnpackTimeout returns bundle timeout from annotation if specified.
849874
// If the timeout annotation is not set, return timeout < 0 which is subsequently ignored.
850875
// This is to overrides the --bundle-unpack-timeout flag value on per-OperatorGroup basis.

pkg/controller/bundle/bundle_unpacker_test.go

+97
Original file line numberDiff line numberDiff line change
@@ -1951,3 +1951,100 @@ func TestOperatorGroupBundleUnpackRetryInterval(t *testing.T) {
19511951
})
19521952
}
19531953
}
1954+
1955+
func TestSortUnpackJobs(t *testing.T) {
1956+
// if there is a non-failed job, it should be first
1957+
// otherwise, the latest job should be first
1958+
//first n-1 jobs and oldest job are preserved
1959+
testJob := func(name string, failed bool, ts int64) *batchv1.Job {
1960+
conditions := []batchv1.JobCondition{}
1961+
if failed {
1962+
conditions = append(conditions, batchv1.JobCondition{
1963+
Type: batchv1.JobFailed,
1964+
Status: corev1.ConditionTrue,
1965+
LastTransitionTime: metav1.Time{Time: time.Unix(ts, 0)},
1966+
})
1967+
}
1968+
return &batchv1.Job{
1969+
ObjectMeta: metav1.ObjectMeta{
1970+
Name: name,
1971+
},
1972+
Status: batchv1.JobStatus{
1973+
Conditions: conditions,
1974+
},
1975+
}
1976+
}
1977+
failedJobs := []*batchv1.Job{
1978+
testJob("f-1", true, 1),
1979+
testJob("f-2", true, 2),
1980+
testJob("f-3", true, 3),
1981+
testJob("f-4", true, 4),
1982+
testJob("f-5", true, 5),
1983+
}
1984+
nonFailedJob := testJob("s-1", false, 1)
1985+
for _, tc := range []struct {
1986+
name string
1987+
jobs []*batchv1.Job
1988+
maxRetained int
1989+
expectedLatest *batchv1.Job
1990+
expectedToDelete []*batchv1.Job
1991+
}{
1992+
{
1993+
name: "no job history",
1994+
maxRetained: 0,
1995+
jobs: []*batchv1.Job{
1996+
failedJobs[1],
1997+
failedJobs[2],
1998+
failedJobs[0],
1999+
},
2000+
expectedLatest: failedJobs[2],
2001+
expectedToDelete: []*batchv1.Job{
2002+
failedJobs[1],
2003+
failedJobs[0],
2004+
},
2005+
}, {
2006+
name: "empty job list",
2007+
maxRetained: 1,
2008+
}, {
2009+
name: "retain oldest",
2010+
maxRetained: 1,
2011+
jobs: []*batchv1.Job{
2012+
failedJobs[2],
2013+
failedJobs[0],
2014+
failedJobs[1],
2015+
},
2016+
expectedToDelete: []*batchv1.Job{
2017+
failedJobs[1],
2018+
},
2019+
expectedLatest: failedJobs[2],
2020+
}, {
2021+
name: "multiple old jobs",
2022+
maxRetained: 2,
2023+
jobs: []*batchv1.Job{
2024+
failedJobs[1],
2025+
failedJobs[0],
2026+
failedJobs[2],
2027+
failedJobs[3],
2028+
failedJobs[4],
2029+
},
2030+
expectedLatest: failedJobs[4],
2031+
expectedToDelete: []*batchv1.Job{
2032+
failedJobs[1],
2033+
failedJobs[2],
2034+
},
2035+
}, {
2036+
name: "select non-failed as latest",
2037+
maxRetained: 3,
2038+
jobs: []*batchv1.Job{
2039+
failedJobs[0],
2040+
failedJobs[1],
2041+
nonFailedJob,
2042+
},
2043+
expectedLatest: nonFailedJob,
2044+
},
2045+
} {
2046+
latest, toDelete := sortUnpackJobs(tc.jobs, tc.maxRetained)
2047+
assert.Equal(t, tc.expectedLatest, latest)
2048+
assert.ElementsMatch(t, tc.expectedToDelete, toDelete)
2049+
}
2050+
}

pkg/controller/operators/catalog/operator.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -1669,7 +1669,7 @@ type UnpackedBundleReference struct {
16691669
Properties string `json:"properties"`
16701670
}
16711671

1672-
func (o *Operator) unpackBundles(namespace string, installPlanSteps []*v1alpha1.Step, bundleLookups []v1alpha1.BundleLookup, unpackTimeout, minUnpackRetryInterval time.Duration) (bool, []*v1alpha1.Step, []v1alpha1.BundleLookup, error) {
1672+
func (o *Operator) unpackBundles(namespace string, installPlanSteps []*v1alpha1.Step, bundleLookups []v1alpha1.BundleLookup, unpackTimeout, unpackRetryInterval time.Duration) (bool, []*v1alpha1.Step, []v1alpha1.BundleLookup, error) {
16731673
unpacked := true
16741674

16751675
outBundleLookups := make([]v1alpha1.BundleLookup, len(bundleLookups))
@@ -1684,7 +1684,7 @@ func (o *Operator) unpackBundles(namespace string, installPlanSteps []*v1alpha1.
16841684
var errs []error
16851685
for i := 0; i < len(outBundleLookups); i++ {
16861686
lookup := outBundleLookups[i]
1687-
res, err := o.bundleUnpacker.UnpackBundle(&lookup, unpackTimeout, minUnpackRetryInterval)
1687+
res, err := o.bundleUnpacker.UnpackBundle(&lookup, unpackTimeout, unpackRetryInterval)
16881688
if err != nil {
16891689
errs = append(errs, err)
16901690
continue

test/e2e/registry.go

-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ func createDockerRegistry(client operatorclient.ClientInterface, namespace strin
5454
Port: int32(5000),
5555
},
5656
},
57-
Type: corev1.ServiceTypeNodePort,
5857
},
5958
}
6059

test/e2e/subscription_e2e_test.go

+10-11
Original file line numberDiff line numberDiff line change
@@ -2526,6 +2526,11 @@ var _ = Describe("Subscription", func() {
25262526
})
25272527
When("bundle unpack retries are enabled", func() {
25282528
It("should retry failing unpack jobs", func() {
2529+
if ok, err := inKind(c); ok && err == nil {
2530+
Skip("This spec fails when run using KIND cluster. See https://github.com/operator-framework/operator-lifecycle-manager/issues/2420 for more details")
2531+
} else if err != nil {
2532+
Skip("Could not determine whether running in a kind cluster. Skipping.")
2533+
}
25292534
By("Ensuring a registry to host bundle images")
25302535
local, err := Local(c)
25312536
Expect(err).NotTo(HaveOccurred(), "cannot determine if test running locally or on CI: %s", err)
@@ -2581,20 +2586,14 @@ var _ = Describe("Subscription", func() {
25812586
}
25822587
}
25832588

2584-
// testImage is the name of the image used throughout the test - the image overwritten by skopeo
2585-
// the tag is generated randomly and appended to the end of the testImage
2589+
// The remote image to be copied onto the local registry
25862590
srcImage := "quay.io/olmtest/example-operator-bundle:"
25872591
srcTag := "0.1.0"
2588-
bundleImage := fmt.Sprint(registryURL, "/unpack-retry-bundle", ":")
2592+
2593+
// on-cluster image ref
2594+
bundleImage := registryURL + "/unpack-retry-bundle:"
25892595
bundleTag := genName("x")
2590-
//// hash hashes data with sha256 and returns the hex string.
2591-
//func hash(data string) string {
2592-
// // A SHA256 hash is 64 characters, which is within the 253 character limit for kube resource names
2593-
// h := fmt.Sprintf("%x", sha256.Sum256([]byte(data)))
2594-
//
2595-
// // Make the hash 63 characters instead to comply with the 63 character limit for labels
2596-
// return fmt.Sprintf(h[:len(h)-1])
2597-
//}
2596+
25982597
unpackRetryCatalog := fmt.Sprintf(`
25992598
schema: olm.package
26002599
name: unpack-retry-package

0 commit comments

Comments
 (0)