From 5e552c76274c3568e5d9a20954adc7fdeeda3079 Mon Sep 17 00:00:00 2001 From: David Grove Date: Mon, 10 Mar 2025 10:08:05 -0400 Subject: [PATCH 1/7] remove appwrapper external framework integration --- .github/workflows/CI-standalone.yaml | 2 +- api/v1beta2/groupversion_info.go | 3 + config/default/config.yaml | 2 - config/dev/config.yaml | 2 - config/rbac/role.yaml | 52 ----- config/standalone/config.yaml | 14 -- config/standalone/kustomization.yaml | 37 ---- config/standalone/manager_config_patch.yaml | 10 - config/standalone/manager_webhook_patch.yaml | 23 --- config/standalone/metrics_service.yaml | 13 -- go.mod | 90 ++++----- go.sum | 190 +++++++++--------- .../appwrapper/appwrapper_controller.go | 2 +- .../controller/appwrapper/fixtures_test.go | 2 +- .../workload/workload_controller.go | 132 ------------ internal/webhook/appwrapper_fixtures_test.go | 2 +- internal/webhook/appwrapper_webhook.go | 49 ++--- internal/webhook/appwrapper_webhook_test.go | 11 - internal/webhook/suite_test.go | 3 - pkg/config/config.go | 38 +--- pkg/controller/setup.go | 20 -- test/e2e/util_test.go | 2 +- 22 files changed, 165 insertions(+), 534 deletions(-) delete mode 100644 config/standalone/config.yaml delete mode 100644 config/standalone/kustomization.yaml delete mode 100644 config/standalone/manager_config_patch.yaml delete mode 100644 config/standalone/manager_webhook_patch.yaml delete mode 100644 config/standalone/metrics_service.yaml delete mode 100644 internal/controller/workload/workload_controller.go diff --git a/.github/workflows/CI-standalone.yaml b/.github/workflows/CI-standalone.yaml index 0906690..abb5aba 100644 --- a/.github/workflows/CI-standalone.yaml +++ b/.github/workflows/CI-standalone.yaml @@ -56,7 +56,7 @@ jobs: - name: Deploy AppWrapper controller run: | make kind-push -e GIT_BRANCH=${{ env.GIT_BRANCH }} TAG=${{ env.GIT_BRANCH }}-${{ env.TAG }} - make deploy -e GIT_BRANCH=${{ env.GIT_BRANCH }} TAG=${{ env.GIT_BRANCH }}-${{ env.TAG }} ENV=standalone + make deploy -e GIT_BRANCH=${{ env.GIT_BRANCH }} TAG=${{ env.GIT_BRANCH }}-${{ env.TAG }} ENV=default - name: Run E2E tests run: LABEL_FILTER="Metrics,Standalone,Webhook" ./hack/run-tests-on-cluster.sh diff --git a/api/v1beta2/groupversion_info.go b/api/v1beta2/groupversion_info.go index cd8fa95..c172105 100644 --- a/api/v1beta2/groupversion_info.go +++ b/api/v1beta2/groupversion_info.go @@ -28,6 +28,9 @@ var ( // GroupVersion is group version used to register these objects GroupVersion = schema.GroupVersion{Group: "workload.codeflare.dev", Version: "v1beta2"} + // AppWrapperKind is the kind name + AppWrapperKind = "AppWrapper" + // SchemeBuilder is used to add go types to the GroupVersionKind scheme SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} diff --git a/config/default/config.yaml b/config/default/config.yaml index ba203c7..8dd597c 100644 --- a/config/default/config.yaml +++ b/config/default/config.yaml @@ -4,8 +4,6 @@ metadata: name: operator-config data: config.yaml: | - appwrapper: - enableKueueIntegrations: true controllerManager: health: bindAddress: ":8081" diff --git a/config/dev/config.yaml b/config/dev/config.yaml index 6c04c30..c21bb86 100644 --- a/config/dev/config.yaml +++ b/config/dev/config.yaml @@ -4,8 +4,6 @@ metadata: name: operator-config data: config.yaml: | - appwrapper: - enableKueueIntegrations: false controllerManager: health: bindAddress: "localhost:0" diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index ddd7946..54d6a5a 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -4,15 +4,6 @@ kind: ClusterRole metadata: name: manager-role rules: -- apiGroups: - - "" - resources: - - events - verbs: - - create - - patch - - update - - watch - apiGroups: - "" resources: @@ -124,41 +115,6 @@ rules: - patch - update - watch -- apiGroups: - - kueue.x-k8s.io - resources: - - resourceflavors - - workloadpriorityclasses - verbs: - - get - - list - - watch -- apiGroups: - - kueue.x-k8s.io - resources: - - workloads - verbs: - - create - - delete - - get - - list - - patch - - update - - watch -- apiGroups: - - kueue.x-k8s.io - resources: - - workloads/finalizers - verbs: - - update -- apiGroups: - - kueue.x-k8s.io - resources: - - workloads/status - verbs: - - get - - patch - - update - apiGroups: - ray.io resources: @@ -172,14 +128,6 @@ rules: - patch - update - watch -- apiGroups: - - scheduling.k8s.io - resources: - - priorityclasses - verbs: - - get - - list - - watch - apiGroups: - scheduling.sigs.k8s.io - scheduling.x-k8s.io diff --git a/config/standalone/config.yaml b/config/standalone/config.yaml deleted file mode 100644 index ec00bae..0000000 --- a/config/standalone/config.yaml +++ /dev/null @@ -1,14 +0,0 @@ -kind: ConfigMap -apiVersion: v1 -metadata: - name: operator-config -data: - config.yaml: | - appwrapper: - enableKueueIntegrations: false - controllerManager: - health: - bindAddress: ":8081" - metrics: - bindAddress: ":8443" - leaderElection: true diff --git a/config/standalone/kustomization.yaml b/config/standalone/kustomization.yaml deleted file mode 100644 index a10ffbb..0000000 --- a/config/standalone/kustomization.yaml +++ /dev/null @@ -1,37 +0,0 @@ -# Adds namespace to all resources. -namespace: appwrapper-system - -# Value of this field is prepended to the -# names of all resources, e.g. a deployment named -# "wordpress" becomes "alices-wordpress". -# Note that it should also match with the prefix (text before '-') of the namespace -# field above. -namePrefix: appwrapper- - -labels: -- pairs: - app.kubernetes.io/name: appwrapper - app.kubernetes.io/component: controller - includeTemplates: true -- pairs: - control-plane: controller-manager - includeSelectors: true - -resources: -- config.yaml -- ../crd -- ../rbac -- ../manager -- ../internalcert -# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in -# crd/kustomization.yaml -- ../webhook -# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. -#- ../prometheus -# [METRICS] Expose the controller manager metrics service. -- metrics_service.yaml - -patches: -# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in -# crd/kustomization.yaml -- path: manager_webhook_patch.yaml diff --git a/config/standalone/manager_config_patch.yaml b/config/standalone/manager_config_patch.yaml deleted file mode 100644 index f6f5891..0000000 --- a/config/standalone/manager_config_patch.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: controller-manager - namespace: system -spec: - template: - spec: - containers: - - name: manager diff --git a/config/standalone/manager_webhook_patch.yaml b/config/standalone/manager_webhook_patch.yaml deleted file mode 100644 index 738de35..0000000 --- a/config/standalone/manager_webhook_patch.yaml +++ /dev/null @@ -1,23 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: controller-manager - namespace: system -spec: - template: - spec: - containers: - - name: manager - ports: - - containerPort: 9443 - name: webhook-server - protocol: TCP - volumeMounts: - - mountPath: /tmp/k8s-webhook-server/serving-certs - name: cert - readOnly: true - volumes: - - name: cert - secret: - defaultMode: 420 - secretName: webhook-server-cert diff --git a/config/standalone/metrics_service.yaml b/config/standalone/metrics_service.yaml deleted file mode 100644 index 728bb35..0000000 --- a/config/standalone/metrics_service.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: controller-manager-metrics-service - namespace: system -spec: - ports: - - name: https - port: 8443 - protocol: TCP - targetPort: 8443 - selector: - control-plane: controller-manager diff --git a/go.mod b/go.mod index c7cd5a7..aac3a3b 100644 --- a/go.mod +++ b/go.mod @@ -3,28 +3,29 @@ module github.com/project-codeflare/appwrapper go 1.23.0 require ( - github.com/distribution/reference v0.5.0 + github.com/distribution/reference v0.6.0 github.com/go-logr/logr v1.4.2 github.com/golangci/golangci-lint v1.63.4 - github.com/kubeflow/training-operator v1.8.1 - github.com/onsi/ginkgo/v2 v2.22.0 - github.com/onsi/gomega v1.36.1 + github.com/kubeflow/training-operator v1.9.0 + github.com/onsi/ginkgo/v2 v2.23.0 + github.com/onsi/gomega v1.36.2 github.com/open-policy-agent/cert-controller v0.12.0 - github.com/prometheus/client_golang v1.20.5 + github.com/prometheus/client_golang v1.21.1 go.uber.org/zap v1.27.0 - k8s.io/api v0.31.4 - k8s.io/apimachinery v0.31.4 - k8s.io/client-go v0.31.4 - k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 - sigs.k8s.io/controller-runtime v0.19.3 + k8s.io/api v0.32.2 + k8s.io/apimachinery v0.32.2 + k8s.io/client-go v0.32.2 + k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 + sigs.k8s.io/controller-runtime v0.20.2 sigs.k8s.io/controller-tools v0.16.5 - sigs.k8s.io/jobset v0.7.1 - sigs.k8s.io/kueue v0.10.2 + sigs.k8s.io/jobset v0.8.0 + sigs.k8s.io/kueue v0.11.0-devel.0.20250310124347-c663d0b0faeb sigs.k8s.io/kustomize/kustomize/v5 v5.5.0 sigs.k8s.io/yaml v1.4.0 ) require ( + cel.dev/expr v0.18.0 // indirect github.com/antlr4-go/antlr/v4 v4.13.0 // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect github.com/beorn7/perks v1.0.1 // indirect @@ -33,7 +34,7 @@ require ( github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emicklei/go-restful/v3 v3.12.1 // indirect - github.com/evanphx/json-patch/v5 v5.9.0 // indirect + github.com/evanphx/json-patch/v5 v5.9.11 // indirect github.com/fatih/color v1.18.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fsnotify/fsnotify v1.8.0 // indirect @@ -47,23 +48,21 @@ require ( github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gobuffalo/flect v1.0.3 // indirect github.com/gogo/protobuf v1.3.2 // indirect - github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect - github.com/google/cel-go v0.20.1 // indirect - github.com/google/gnostic-models v0.6.8 // indirect - github.com/google/go-cmp v0.6.0 // indirect + github.com/google/btree v1.1.3 // indirect + github.com/google/cel-go v0.22.0 // indirect + github.com/google/gnostic-models v0.6.9 // indirect + github.com/google/go-cmp v0.7.0 // indirect github.com/google/gofuzz v1.2.0 // indirect - github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect + github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/google/uuid v1.6.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect - github.com/imdario/mergo v0.3.16 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/klauspost/compress v1.17.9 // indirect - github.com/kubeflow/mpi-operator v0.6.0 // indirect - github.com/mailru/easyjson v0.7.7 // indirect + github.com/klauspost/compress v1.17.11 // indirect + github.com/mailru/easyjson v0.9.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect @@ -73,11 +72,11 @@ require ( github.com/opencontainers/go-digest v1.0.0 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/prometheus/client_model v0.6.1 // indirect - github.com/prometheus/common v0.57.0 // indirect + github.com/prometheus/common v0.62.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect github.com/sirupsen/logrus v1.9.3 // indirect - github.com/spf13/cobra v1.8.1 // indirect - github.com/spf13/pflag v1.0.5 // indirect + github.com/spf13/cobra v1.9.1 // indirect + github.com/spf13/pflag v1.0.6 // indirect github.com/stoewer/go-strcase v1.3.0 // indirect github.com/x448/float16 v0.8.4 // indirect github.com/xlab/treeprint v1.2.0 // indirect @@ -92,33 +91,32 @@ require ( go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 // indirect - golang.org/x/mod v0.22.0 // indirect - golang.org/x/net v0.33.0 // indirect - golang.org/x/oauth2 v0.21.0 // indirect - golang.org/x/sync v0.10.0 // indirect - golang.org/x/sys v0.28.0 // indirect - golang.org/x/term v0.27.0 // indirect - golang.org/x/text v0.21.0 // indirect - golang.org/x/time v0.6.0 // indirect - golang.org/x/tools v0.28.0 // indirect - gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 // indirect + golang.org/x/mod v0.23.0 // indirect + golang.org/x/net v0.35.0 // indirect + golang.org/x/oauth2 v0.24.0 // indirect + golang.org/x/sync v0.11.0 // indirect + golang.org/x/sys v0.30.0 // indirect + golang.org/x/term v0.29.0 // indirect + golang.org/x/text v0.22.0 // indirect + golang.org/x/time v0.7.0 // indirect + golang.org/x/tools v0.30.0 // indirect + gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 // indirect google.golang.org/grpc v1.65.0 // indirect - google.golang.org/protobuf v1.35.1 // indirect + google.golang.org/protobuf v1.36.4 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/apiextensions-apiserver v0.31.2 // indirect - k8s.io/apiserver v0.31.4 // indirect - k8s.io/component-base v0.31.4 // indirect - k8s.io/component-helpers v0.31.4 // indirect + k8s.io/apiextensions-apiserver v0.32.1 // indirect + k8s.io/apiserver v0.32.2 // indirect + k8s.io/component-base v0.32.2 // indirect k8s.io/klog/v2 v2.130.1 // indirect - k8s.io/kube-openapi v0.0.0-20240812233141-91dab695df6f // indirect - sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.30.3 // indirect - sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + k8s.io/kube-openapi v0.0.0-20241212222426-2c72e554b1e7 // indirect + sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0 // indirect + sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect sigs.k8s.io/kustomize/api v0.18.0 // indirect sigs.k8s.io/kustomize/kyaml v0.18.1 // indirect - sigs.k8s.io/structured-merge-diff/v4 v4.4.3 // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect ) diff --git a/go.sum b/go.sum index 6fa3055..50961b9 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +cel.dev/expr v0.18.0 h1:CJ6drgk+Hf96lkLikr4rFf19WrU0BOWEihyZnI2TAzo= +cel.dev/expr v0.18.0/go.mod h1:MrpN08Q+lEBs+bGYdLxxHkZoUSsCp0nSKTs0nTymJgw= github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI= github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= @@ -10,19 +12,19 @@ github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK3 github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/distribution/reference v0.5.0 h1:/FUIFXtfc/x2gpa5/VGfiGLuOIdYa1t65IKK2OFGvA0= -github.com/distribution/reference v0.5.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= +github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= +github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU= github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= -github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= -github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= -github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg= -github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= +github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lShz4oaXpDTX2bLe7ls= +github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= +github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM= github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= @@ -52,32 +54,30 @@ github.com/gobuffalo/flect v1.0.3 h1:xeWBM2nui+qnVvNM4S3foBhCAL2XgPU+a7FdpelbTq4 github.com/gobuffalo/flect v1.0.3/go.mod h1:A5msMlrHtLqh9umBSnvabjsMrCcCpAyzglnDvkbYKHs= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= -github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golangci/golangci-lint v1.63.4 h1:bJQFQ3hSfUto597dkL7ipDzOxsGEpiWdLiZ359OWOBI= github.com/golangci/golangci-lint v1.63.4/go.mod h1:Hx0B7Lg5/NXbaOHem8+KU+ZUIzMI6zNj/7tFwdnn10I= -github.com/google/cel-go v0.20.1 h1:nDx9r8S3L4pE61eDdt8igGj8rf5kjYR3ILxWIpWNi84= -github.com/google/cel-go v0.20.1/go.mod h1:kWcIzTsPX0zmQ+H3TirHstLLf9ep5QTsZBN9u4dOYLg= -github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= -github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= +github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= +github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= +github.com/google/cel-go v0.22.0 h1:b3FJZxpiv1vTMo2/5RDUqAHPxkT8mmMfJIrq1llbf7g= +github.com/google/cel-go v0.22.0/go.mod h1:BuznPXXfQDpXKWQ9sPW3TzlAJN5zzFe+i9tIs0yC4s8= +github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw= +github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= -github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad h1:a6HEuzUHeKH6hwfN/ZoQgRgVIWFJljSWa/zetS2WTvg= +github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0= github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k= -github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= -github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= @@ -86,20 +86,18 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/kubeflow/mpi-operator v0.6.0 h1:RzzG03oyQIiHe/SH2k19N+Hp4QkxEsG2l9G1T7US+ys= -github.com/kubeflow/mpi-operator v0.6.0/go.mod h1:lu3cj3ESq3SdS29nbHC26TuXiTYYQYTHJ6fcy5Xv20c= -github.com/kubeflow/training-operator v1.8.1 h1:mVvoBSCInbMBX0gGSn4+Ihj3ycwoLO1iEAw+0qPhQfc= -github.com/kubeflow/training-operator v1.8.1/go.mod h1:T6I15h1S09ncH5C6St/QEC7Dy6dpHZA5sPFo+VoJAvE= +github.com/kubeflow/training-operator v1.9.0 h1:L+ep5YQT1Pq62O3VjW6G+IRQw+NpFlPRJJj8TgydBhQ= +github.com/kubeflow/training-operator v1.9.0/go.mod h1:tkXcAngbhpdskDE75smgfdqOW17tmWJ+2389+FzMNvo= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= -github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= -github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= +github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= @@ -118,10 +116,10 @@ github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= -github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg= -github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= -github.com/onsi/gomega v1.36.1 h1:bJDPBO7ibjxcbHMgSCoo4Yj18UWbKDlLwX1x9sybDcw= -github.com/onsi/gomega v1.36.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= +github.com/onsi/ginkgo/v2 v2.23.0 h1:FA1xjp8ieYDzlgS5ABTpdUDB7wtngggONc8a7ku2NqQ= +github.com/onsi/ginkgo/v2 v2.23.0/go.mod h1:zXTP6xIp3U8aVuXN8ENK9IXRaTjFnpVB9mGmaSRvxnM= +github.com/onsi/gomega v1.36.2 h1:koNYke6TVk6ZmnyHrCXba/T/MoLBXFjeC1PtvYgw0A8= +github.com/onsi/gomega v1.36.2/go.mod h1:DdwyADRjrc825LhMEkD76cHR5+pUnjhUN8GlHlRPHzY= github.com/open-policy-agent/cert-controller v0.12.0 h1:RKXlBafMcCh+++I1geJetXo77tAjyb4542DQc/+aZIw= github.com/open-policy-agent/cert-controller v0.12.0/go.mod h1:N5bCFXdAXMYx0PdS6ZQ9lrDQQMz+F6deoChym6VleXw= github.com/open-policy-agent/frameworks/constraint v0.0.0-20241101234656-e78c8abd754a h1:gQtOJ50XFyL2Xh3lDD9zP4KQ2PY4mZKQ9hDcWc81Sp8= @@ -133,16 +131,14 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= -github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_golang v1.21.1 h1:DOvXXTqVzvkIewV/CDPFdejpMCGeMcbGCQ8YOmu+Ibk= +github.com/prometheus/client_golang v1.21.1/go.mod h1:U9NM32ykUErtVBxdvD3zfi+EuFkkaBvMb09mIfe0Zgg= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.57.0 h1:Ro/rKjwdq9mZn1K5QPctzh+MA4Lp0BuYk5ZZEVhoNcY= -github.com/prometheus/common v0.57.0/go.mod h1:7uRPFSUTbfZWsJ7MHY56sqt7hLQu3bxXHDnNhl8E9qI= +github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io= +github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= -github.com/ray-project/kuberay/ray-operator v1.2.2 h1:wj4qe9SmJfD1ubgEaVPuAsnU/WFDvremzR8j3JslBdk= -github.com/ray-project/kuberay/ray-operator v1.2.2/go.mod h1:osTiIyaDoWi5IN1f0tOOtZ4TzVf+5kJXZor8VFvcEiI= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= @@ -150,10 +146,10 @@ github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ= github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= -github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= +github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs= github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -205,57 +201,57 @@ golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 h1:e66Fs6Z+fZTbFBAxKfP3PALWB golang.org/x/exp v0.0.0-20240909161429-701f63a606c0/go.mod h1:2TbTHSBQa924w8M6Xs1QcRcFwyucIwBGpK1p2f1YFFY= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4= -golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= +golang.org/x/mod v0.23.0 h1:Zb7khfcRGKk+kqfxFaP5tZqCnDZMjC5VtUBs87Hr6QM= +golang.org/x/mod v0.23.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= -golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= -golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= -golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8= +golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk= +golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= +golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= -golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= +golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= -golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= -golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= +golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= +golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU= +golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= -golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= -golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= -golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= +golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= +golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ= +golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.28.0 h1:WuB6qZ4RPCQo5aP3WdKZS7i595EdWqWR8vqJTlwTVK8= -golang.org/x/tools v0.28.0/go.mod h1:dcIOrVd3mfQKTgrDVQHqCPMWy6lnhfhtX3hLXYVLfRw= +golang.org/x/tools v0.30.0 h1:BgcpHewrV5AUp2G9MebG4XPFI1E2W41zU1SaqVA9vJY= +golang.org/x/tools v0.30.0/go.mod h1:c347cR/OJfw5TI+GfX7RUPNMdDRRbjvYTS0jPyvsVtY= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= -gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= -google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157 h1:7whR9kGa5LUwFtpLm2ArCEejtnxlGeLbAyjFY8sGNFw= -google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157/go.mod h1:99sLkeliLXfdj2J75X3Ho+rrVCaJze0uwN7zDDkjPVU= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 h1:BwIjyKYGsK9dMCBOorzRri8MQwmi7mT9rGHsCEinZkA= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY= +gomodules.xyz/jsonpatch/v2 v2.5.0 h1:JELs8RLM12qJGXU4u/TO3V25KW8GreMKl9pdkk14RM0= +gomodules.xyz/jsonpatch/v2 v2.5.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 h1:YcyjlL1PRr2Q17/I0dPk2JmYS5CDXfcdb2Z3YRioEbw= +google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:OCdP9MfskevB/rbYvHTsXTtKC+3bHWajPdoKgjcYkfo= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 h1:2035KHhUv+EpyB+hWgJnaWKJOdX1E95w2S8Rr4uWKTs= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc= google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ= -google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= -google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.36.4 h1:6A3ZDJHn/eNqc1i+IdefRzy/9PokBTPvcqMySR7NNIM= +google.golang.org/protobuf v1.36.4/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= @@ -270,47 +266,47 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -k8s.io/api v0.31.4 h1:I2QNzitPVsPeLQvexMEsj945QumYraqv9m74isPDKhM= -k8s.io/api v0.31.4/go.mod h1:d+7vgXLvmcdT1BCo79VEgJxHHryww3V5np2OYTr6jdw= -k8s.io/apiextensions-apiserver v0.31.2 h1:W8EwUb8+WXBLu56ser5IudT2cOho0gAKeTOnywBLxd0= -k8s.io/apiextensions-apiserver v0.31.2/go.mod h1:i+Geh+nGCJEGiCGR3MlBDkS7koHIIKWVfWeRFiOsUcM= -k8s.io/apimachinery v0.31.4 h1:8xjE2C4CzhYVm9DGf60yohpNUh5AEBnPxCryPBECmlM= -k8s.io/apimachinery v0.31.4/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= -k8s.io/apiserver v0.31.4 h1:JbtnTaXVYEAYIHJil6Wd74Wif9sd8jVcBw84kwEmp7o= -k8s.io/apiserver v0.31.4/go.mod h1:JJjoTjZ9PTMLdIFq7mmcJy2B9xLN3HeAUebW6xZyIP0= -k8s.io/client-go v0.31.4 h1:t4QEXt4jgHIkKKlx06+W3+1JOwAFU/2OPiOo7H92eRQ= -k8s.io/client-go v0.31.4/go.mod h1:kvuMro4sFYIa8sulL5Gi5GFqUPvfH2O/dXuKstbaaeg= -k8s.io/component-base v0.31.4 h1:wCquJh4ul9O8nNBSB8N/o8+gbfu3BVQkVw9jAUY/Qtw= -k8s.io/component-base v0.31.4/go.mod h1:G4dgtf5BccwiDT9DdejK0qM6zTK0jwDGEKnCmb9+u/s= -k8s.io/component-helpers v0.31.4 h1:pqokuXozyWVrVBMmx0AMcKqNWqXhR00OZvpAE5hG5NM= -k8s.io/component-helpers v0.31.4/go.mod h1:Ddq5GYRK/1uNoPNgJh9N5osPutvBweQEcIG6b8kcvgQ= +k8s.io/api v0.32.2 h1:bZrMLEkgizC24G9eViHGOPbW+aRo9duEISRIJKfdJuw= +k8s.io/api v0.32.2/go.mod h1:hKlhk4x1sJyYnHENsrdCWw31FEmCijNGPJO5WzHiJ6Y= +k8s.io/apiextensions-apiserver v0.32.1 h1:hjkALhRUeCariC8DiVmb5jj0VjIc1N0DREP32+6UXZw= +k8s.io/apiextensions-apiserver v0.32.1/go.mod h1:sxWIGuGiYov7Io1fAS2X06NjMIk5CbRHc2StSmbaQto= +k8s.io/apimachinery v0.32.2 h1:yoQBR9ZGkA6Rgmhbp/yuT9/g+4lxtsGYwW6dR6BDPLQ= +k8s.io/apimachinery v0.32.2/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE= +k8s.io/apiserver v0.32.2 h1:WzyxAu4mvLkQxwD9hGa4ZfExo3yZZaYzoYvvVDlM6vw= +k8s.io/apiserver v0.32.2/go.mod h1:PEwREHiHNU2oFdte7BjzA1ZyjWjuckORLIK/wLV5goM= +k8s.io/client-go v0.32.2 h1:4dYCD4Nz+9RApM2b/3BtVvBHw54QjMFUl1OLcJG5yOA= +k8s.io/client-go v0.32.2/go.mod h1:fpZ4oJXclZ3r2nDOv+Ux3XcJutfrwjKTCHz2H3sww94= +k8s.io/component-base v0.32.2 h1:1aUL5Vdmu7qNo4ZsE+569PV5zFatM9hl+lb3dEea2zU= +k8s.io/component-base v0.32.2/go.mod h1:PXJ61Vx9Lg+P5mS8TLd7bCIr+eMJRQTyXe8KvkrvJq0= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/kube-aggregator v0.31.2 h1:Uw1zUP2D/4wiSjKWVVzSOcCGLuW/+IdRwjjC0FJooYU= k8s.io/kube-aggregator v0.31.2/go.mod h1:41/VIXH+/Qcg9ERNAY6bRF/WQR6xL1wFgYagdHac1X4= -k8s.io/kube-openapi v0.0.0-20240812233141-91dab695df6f h1:bnWtxXWdAl5bVOCEPoNdvMkyj6cTW3zxHuwKIakuV9w= -k8s.io/kube-openapi v0.0.0-20240812233141-91dab695df6f/go.mod h1:G0W3eI9gG219NHRq3h5uQaRBl4pj4ZpwzRP5ti8y770= -k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= -k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= -sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.30.3 h1:2770sDpzrjjsAtVhSeUFseziht227YAWYHLGNM8QPwY= -sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.30.3/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= -sigs.k8s.io/controller-runtime v0.19.3 h1:XO2GvC9OPftRst6xWCpTgBZO04S2cbp0Qqkj8bX1sPw= -sigs.k8s.io/controller-runtime v0.19.3/go.mod h1:j4j87DqtsThvwTv5/Tc5NFRyyF/RF0ip4+62tbTSIUM= +k8s.io/kube-openapi v0.0.0-20241212222426-2c72e554b1e7 h1:hcha5B1kVACrLujCKLbr8XWMxCxzQx42DY8QKYJrDLg= +k8s.io/kube-openapi v0.0.0-20241212222426-2c72e554b1e7/go.mod h1:GewRfANuJ70iYzvn+i4lezLDAFzvjxZYK1gn1lWcfas= +k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro= +k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0 h1:CPT0ExVicCzcpeN4baWEV2ko2Z/AsiZgEdwgcfwLgMo= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= +sigs.k8s.io/controller-runtime v0.20.2 h1:/439OZVxoEc02psi1h4QO3bHzTgu49bb347Xp4gW1pc= +sigs.k8s.io/controller-runtime v0.20.2/go.mod h1:xg2XB0K5ShQzAgsoujxuKN4LNXR2LfwwHsPj7Iaw+XY= sigs.k8s.io/controller-tools v0.16.5 h1:5k9FNRqziBPwqr17AMEPPV/En39ZBplLAdOwwQHruP4= sigs.k8s.io/controller-tools v0.16.5/go.mod h1:8vztuRVzs8IuuJqKqbXCSlXcw+lkAv/M2sTpg55qjMY= -sigs.k8s.io/jobset v0.7.1 h1:m28IaaKrQyZ8qa0Q7jK3U5/6TEW+27QPALjlalLP/0A= -sigs.k8s.io/jobset v0.7.1/go.mod h1:cbBuQ6QrTU88x5PrDqcC5AI28YyTSQCIcRTLNTJoHDE= -sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= -sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= -sigs.k8s.io/kueue v0.10.2 h1:p0C2zZiSCb6Dt77jCU/DMx2LFujN4etIGjjj7q+Ylao= -sigs.k8s.io/kueue v0.10.2/go.mod h1:3yzOvGI0sPOC3VL1ihVIrzc8mkSyCVTL+SrouewwRWw= +sigs.k8s.io/jobset v0.8.0 h1:80cJcPld+IMdKFOqzEW4et3Y6lGAPcP8YmBZ+aiKGYA= +sigs.k8s.io/jobset v0.8.0/go.mod h1:yitjuGOExl2p964nhyevQGIkfiPSRHcdC3zNBneKCT8= +sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8= +sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo= +sigs.k8s.io/kueue v0.11.0-devel.0.20250310124347-c663d0b0faeb h1:DHwLXuSj1NMqOIxwHhxHiXXHS1CBkIsYeU5narw4j1A= +sigs.k8s.io/kueue v0.11.0-devel.0.20250310124347-c663d0b0faeb/go.mod h1:+2mf2ItP10M/Wa5nixARFOO+ApxArSKxLGhKtmqjaRc= sigs.k8s.io/kustomize/api v0.18.0 h1:hTzp67k+3NEVInwz5BHyzc9rGxIauoXferXyjv5lWPo= sigs.k8s.io/kustomize/api v0.18.0/go.mod h1:f8isXnX+8b+SGLHQ6yO4JG1rdkZlvhaCf/uZbLVMb0U= sigs.k8s.io/kustomize/kustomize/v5 v5.5.0 h1:o1mtt6vpxsxDYaZKrw3BnEtc+pAjLz7UffnIvHNbvW0= sigs.k8s.io/kustomize/kustomize/v5 v5.5.0/go.mod h1:AeFCmgCrXzmvjWWaeZCyBp6XzG1Y0w1svYus8GhJEOE= sigs.k8s.io/kustomize/kyaml v0.18.1 h1:WvBo56Wzw3fjS+7vBjN6TeivvpbW9GmRaWZ9CIVmt4E= sigs.k8s.io/kustomize/kyaml v0.18.1/go.mod h1:C3L2BFVU1jgcddNBE1TxuVLgS46TjObMwW5FT9FcjYo= -sigs.k8s.io/structured-merge-diff/v4 v4.4.3 h1:sCP7Vv3xx/CWIuTPVN38lUPx0uw0lcLfzaiDa8Ja01A= -sigs.k8s.io/structured-merge-diff/v4 v4.4.3/go.mod h1:N8f93tFZh9U6vpxwRArLiikrE5/2tiu1w1AGfACIGE4= +sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016 h1:kXv6kKdoEtedwuqMmkqhbkgvYKeycVbC8+iPCP9j5kQ= +sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW1TdblaLVAc= +sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/internal/controller/appwrapper/appwrapper_controller.go b/internal/controller/appwrapper/appwrapper_controller.go index 8f7ada1..eaec21d 100644 --- a/internal/controller/appwrapper/appwrapper_controller.go +++ b/internal/controller/appwrapper/appwrapper_controller.go @@ -910,7 +910,7 @@ func (r *AppWrapperReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&awv1beta2.AppWrapper{}). Watches(&v1.Pod{}, handler.EnqueueRequestsFromMapFunc(r.podMapFunc)). - Named("AppWrapper"). + Named(awv1beta2.AppWrapperKind). Complete(r) } diff --git a/internal/controller/appwrapper/fixtures_test.go b/internal/controller/appwrapper/fixtures_test.go index f59551b..cf3c853 100644 --- a/internal/controller/appwrapper/fixtures_test.go +++ b/internal/controller/appwrapper/fixtures_test.go @@ -49,7 +49,7 @@ func randName(baseName string) string { func toAppWrapper(components ...awv1beta2.AppWrapperComponent) *awv1beta2.AppWrapper { return &awv1beta2.AppWrapper{ - TypeMeta: metav1.TypeMeta{APIVersion: awv1beta2.GroupVersion.String(), Kind: "AppWrapper"}, + TypeMeta: metav1.TypeMeta{APIVersion: awv1beta2.GroupVersion.String(), Kind: awv1beta2.AppWrapperKind}, ObjectMeta: metav1.ObjectMeta{Name: randName("aw"), Namespace: "default"}, Spec: awv1beta2.AppWrapperSpec{Components: components}, } diff --git a/internal/controller/workload/workload_controller.go b/internal/controller/workload/workload_controller.go deleted file mode 100644 index a81dba5..0000000 --- a/internal/controller/workload/workload_controller.go +++ /dev/null @@ -1,132 +0,0 @@ -/* -Copyright 2024 IBM Corporation. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package workload - -import ( - "fmt" - - "k8s.io/apimachinery/pkg/api/meta" - "k8s.io/apimachinery/pkg/runtime/schema" - - "sigs.k8s.io/controller-runtime/pkg/builder" - "sigs.k8s.io/controller-runtime/pkg/client" - - kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" - "sigs.k8s.io/kueue/pkg/controller/jobframework" - "sigs.k8s.io/kueue/pkg/podset" - - awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" - "github.com/project-codeflare/appwrapper/pkg/utils" -) - -// +kubebuilder:rbac:groups=scheduling.k8s.io,resources=priorityclasses,verbs=list;get;watch -// +kubebuilder:rbac:groups="",resources=events,verbs=create;watch;update;patch -// +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=workloads,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=workloads/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=workloads/finalizers,verbs=update -// +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=resourceflavors,verbs=get;list;watch -// +kubebuilder:rbac:groups=kueue.x-k8s.io,resources=workloadpriorityclasses,verbs=get;list;watch - -type AppWrapper awv1beta2.AppWrapper - -var ( - GVK = awv1beta2.GroupVersion.WithKind("AppWrapper") - WorkloadReconciler = jobframework.NewGenericReconcilerFactory( - func() jobframework.GenericJob { return &AppWrapper{} }, - func(b *builder.Builder, c client.Client) *builder.Builder { - return b.Named("AppWrapperWorkload") - }, - ) -) - -func (aw *AppWrapper) Object() client.Object { - return (*awv1beta2.AppWrapper)(aw) -} - -func (aw *AppWrapper) IsSuspended() bool { - return aw.Spec.Suspend -} - -func (aw *AppWrapper) IsActive() bool { - return meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.QuotaReserved)) -} - -func (aw *AppWrapper) Suspend() { - aw.Spec.Suspend = true -} - -func (aw *AppWrapper) GVK() schema.GroupVersionKind { - return GVK -} - -func (aw *AppWrapper) PodSets() []kueue.PodSet { - podSpecTemplates, awPodSets, err := utils.GetComponentPodSpecs((*awv1beta2.AppWrapper)(aw)) - if err != nil { - // Kueue will raise an error on zero length PodSet; the Kueue GenericJob API prevents propagating the actual error. - return []kueue.PodSet{} - } - podSets := []kueue.PodSet{} - for psIndex := range podSpecTemplates { - podSets = append(podSets, kueue.PodSet{ - Name: fmt.Sprintf("%s-%v", aw.Name, psIndex), - Template: *podSpecTemplates[psIndex], - Count: utils.Replicas(awPodSets[psIndex]), - TopologyRequest: jobframework.PodSetTopologyRequest(&(podSpecTemplates[psIndex].ObjectMeta), nil, nil, nil), - }) - } - return podSets -} - -func (aw *AppWrapper) RunWithPodSetsInfo(podSetsInfo []podset.PodSetInfo) error { - awPodSetsInfo := make([]awv1beta2.AppWrapperPodSetInfo, len(podSetsInfo)) - for idx := range podSetsInfo { - awPodSetsInfo[idx].Annotations = podSetsInfo[idx].Annotations - awPodSetsInfo[idx].Labels = podSetsInfo[idx].Labels - awPodSetsInfo[idx].NodeSelector = podSetsInfo[idx].NodeSelector - awPodSetsInfo[idx].Tolerations = podSetsInfo[idx].Tolerations - awPodSetsInfo[idx].SchedulingGates = podSetsInfo[idx].SchedulingGates - } - - if err := utils.SetPodSetInfos((*awv1beta2.AppWrapper)(aw), awPodSetsInfo); err != nil { - return fmt.Errorf("%w: %v", podset.ErrInvalidPodsetInfo, err) - } - aw.Spec.Suspend = false - return nil -} - -func (aw *AppWrapper) RestorePodSetsInfo(podSetsInfo []podset.PodSetInfo) bool { - return utils.ClearPodSetInfos((*awv1beta2.AppWrapper)(aw)) -} - -func (aw *AppWrapper) Finished() (message string, success, finished bool) { - switch aw.Status.Phase { - case awv1beta2.AppWrapperSucceeded: - return "AppWrapper finished successfully", true, true - - case awv1beta2.AppWrapperFailed: - if meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.ResourcesDeployed)) { - return "Still deleting resources for failed AppWrapper", false, false - } else { - return "AppWrapper failed", false, true - } - } - return "", false, false -} - -func (aw *AppWrapper) PodsReady() bool { - return meta.IsStatusConditionTrue(aw.Status.Conditions, string(awv1beta2.PodsReady)) -} diff --git a/internal/webhook/appwrapper_fixtures_test.go b/internal/webhook/appwrapper_fixtures_test.go index 586d102..4330ffa 100644 --- a/internal/webhook/appwrapper_fixtures_test.go +++ b/internal/webhook/appwrapper_fixtures_test.go @@ -45,7 +45,7 @@ func randName(baseName string) string { func toAppWrapper(components ...awv1beta2.AppWrapperComponent) *awv1beta2.AppWrapper { return &awv1beta2.AppWrapper{ - TypeMeta: metav1.TypeMeta{APIVersion: awv1beta2.GroupVersion.String(), Kind: "AppWrapper"}, + TypeMeta: metav1.TypeMeta{APIVersion: awv1beta2.GroupVersion.String(), Kind: awv1beta2.AppWrapperKind}, ObjectMeta: metav1.ObjectMeta{Name: randName("aw"), Namespace: "default"}, Spec: awv1beta2.AppWrapperSpec{Components: components}, } diff --git a/internal/webhook/appwrapper_webhook.go b/internal/webhook/appwrapper_webhook.go index 68800b5..4253166 100644 --- a/internal/webhook/appwrapper_webhook.go +++ b/internal/webhook/appwrapper_webhook.go @@ -24,7 +24,6 @@ import ( authv1 "k8s.io/api/authorization/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/validation/field" @@ -37,10 +36,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" - "sigs.k8s.io/kueue/pkg/controller/jobframework" awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" - wlc "github.com/project-codeflare/appwrapper/internal/controller/workload" utilmaps "github.com/project-codeflare/appwrapper/internal/util" "github.com/project-codeflare/appwrapper/pkg/config" "github.com/project-codeflare/appwrapper/pkg/utils" @@ -52,6 +49,10 @@ const ( QueueNameLabel = "kueue.x-k8s.io/queue-name" ) +var ( + awgvk = awv1beta2.GroupVersion.WithKind(awv1beta2.AppWrapperKind) +) + type rbacACSupport struct { discoveryClient *discovery.DiscoveryClient subjectAccessReviewer authClientv1.SubjectAccessReviewInterface @@ -59,12 +60,9 @@ type rbacACSupport struct { } type appWrapperWebhook struct { - client client.Client - defaultQueueName string - enableKueueIntegrations bool - manageJobsWithoutQueueName bool - managedJobsNamespaceSelector labels.Selector - userRBACAdmissionCheck bool + client client.Client + defaultQueueName string + userRBACAdmissionCheck bool // support for userRBACAdmissionCheck; will be nil if it is not enabled rbacACSupport *rbacACSupport @@ -82,15 +80,9 @@ func (w *appWrapperWebhook) Default(ctx context.Context, obj runtime.Object) err aw := obj.(*awv1beta2.AppWrapper) log.FromContext(ctx).V(2).Info("Applying defaults", "job", aw) - // Queue name and Suspend - if w.enableKueueIntegrations { - if w.defaultQueueName != "" { - aw.Labels = utilmaps.MergeKeepFirst(aw.Labels, map[string]string{QueueNameLabel: w.defaultQueueName}) - } - err := jobframework.ApplyDefaultForSuspend(ctx, (*wlc.AppWrapper)(aw), w.client, w.manageJobsWithoutQueueName, w.managedJobsNamespaceSelector) - if err != nil { - return err - } + // propagate non-empty default queue name + if w.defaultQueueName != "" { + aw.Labels = utilmaps.MergeKeepFirst(aw.Labels, map[string]string{QueueNameLabel: w.defaultQueueName}) } // inject labels with user name and id @@ -114,9 +106,6 @@ func (w *appWrapperWebhook) ValidateCreate(ctx context.Context, obj runtime.Obje aw := obj.(*awv1beta2.AppWrapper) log.FromContext(ctx).V(2).Info("Validating create", "job", aw) allErrors := w.validateAppWrapperCreate(ctx, aw) - if w.enableKueueIntegrations { - allErrors = append(allErrors, jobframework.ValidateJobOnCreate((*wlc.AppWrapper)(aw))...) - } return nil, allErrors.ToAggregate() } @@ -126,9 +115,6 @@ func (w *appWrapperWebhook) ValidateUpdate(ctx context.Context, oldObj, newObj r newAW := newObj.(*awv1beta2.AppWrapper) log.FromContext(ctx).V(2).Info("Validating update", "job", newAW) allErrors := w.validateAppWrapperUpdate(oldAW, newAW) - if w.enableKueueIntegrations { - allErrors = append(allErrors, jobframework.ValidateJobOnUpdate((*wlc.AppWrapper)(oldAW), (*wlc.AppWrapper)(newAW))...) - } return nil, allErrors.ToAggregate() } @@ -167,7 +153,7 @@ func (w *appWrapperWebhook) validateAppWrapperCreate(ctx context.Context, aw *aw } // 1. Deny nested AppWrappers - if *gvk == wlc.GVK { + if *gvk == awgvk { allErrors = append(allErrors, field.Forbidden(compPath.Child("template"), "Nested AppWrappers are forbidden")) } @@ -311,17 +297,10 @@ func (w *appWrapperWebhook) lookupResource(gvk *schema.GroupVersionKind) string } func SetupAppWrapperWebhook(mgr ctrl.Manager, awConfig *config.AppWrapperConfig) error { - nsSelector, err := metav1.LabelSelectorAsSelector(awConfig.KueueJobReconciller.ManageJobsNamespaceSelector) - if err != nil { - return err - } wh := &appWrapperWebhook{ - client: mgr.GetClient(), - defaultQueueName: awConfig.DefaultQueueName, - enableKueueIntegrations: awConfig.EnableKueueIntegrations, - manageJobsWithoutQueueName: awConfig.KueueJobReconciller.ManageJobsWithoutQueueName, - managedJobsNamespaceSelector: nsSelector, - userRBACAdmissionCheck: awConfig.UserRBACAdmissionCheck, + client: mgr.GetClient(), + defaultQueueName: awConfig.DefaultQueueName, + userRBACAdmissionCheck: awConfig.UserRBACAdmissionCheck, } if awConfig.UserRBACAdmissionCheck { diff --git a/internal/webhook/appwrapper_webhook_test.go b/internal/webhook/appwrapper_webhook_test.go index cf0cd98..37b6a4f 100644 --- a/internal/webhook/appwrapper_webhook_test.go +++ b/internal/webhook/appwrapper_webhook_test.go @@ -32,14 +32,6 @@ import ( var _ = Describe("AppWrapper Webhook Tests", func() { Context("Defaulting Webhook", func() { - It("Suspended is set to true", func() { - aw := toAppWrapper(pod(100)) - - Expect(k8sClient.Create(ctx, aw)).To(Succeed()) - Expect(aw.Spec.Suspend).Should(BeTrue(), "aw.Spec.Suspend should have been changed to true") - Expect(k8sClient.Delete(ctx, aw)).To(Succeed()) - }) - It("Default queue name is set", func() { aw := toAppWrapper(pod(100)) @@ -244,7 +236,6 @@ var _ = Describe("AppWrapper Webhook Tests", func() { aw := toAppWrapper(pod(100), deployment(1, 100), namespacedPod("default", 100), rayCluster(1, 100), jobSet(1, 100)) Expect(k8sClient.Create(ctx, aw)).To(Succeed(), "Legal AppWrappers should be accepted") - Expect(aw.Spec.Suspend).Should(BeTrue()) Expect(k8sClient.Delete(ctx, aw)).To(Succeed()) }) @@ -254,7 +245,6 @@ var _ = Describe("AppWrapper Webhook Tests", func() { jobForInference(2, 4, 100), jobForInference(8, 4, 100)) Expect(k8sClient.Create(ctx, aw)).To(Succeed(), "PodSets should be inferred") - Expect(aw.Spec.Suspend).Should(BeTrue()) Expect(k8sClient.Delete(ctx, aw)).To(Succeed()) }) @@ -262,7 +252,6 @@ var _ = Describe("AppWrapper Webhook Tests", func() { aw := toAppWrapper(pytorchJobForInference(100, 4, 100), rayClusterForInference(7, 100), rayJobForInference(7, 100)) Expect(k8sClient.Create(ctx, aw)).To(Succeed(), "PodSets should be inferred") - Expect(aw.Spec.Suspend).Should(BeTrue()) Expect(k8sClient.Delete(ctx, aw)).To(Succeed()) }) }) diff --git a/internal/webhook/suite_test.go b/internal/webhook/suite_test.go index db1ae23..07a5655 100644 --- a/internal/webhook/suite_test.go +++ b/internal/webhook/suite_test.go @@ -47,7 +47,6 @@ import ( awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" "github.com/project-codeflare/appwrapper/pkg/config" - kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" ) // These tests use Ginkgo (BDD-style Go testing framework). Refer to @@ -110,8 +109,6 @@ var _ = BeforeSuite(func() { Expect(err).NotTo(HaveOccurred()) err = clientgoscheme.AddToScheme(scheme) Expect(err).NotTo(HaveOccurred()) - err = kueue.AddToScheme(scheme) - Expect(err).NotTo(HaveOccurred()) //+kubebuilder:scaffold:scheme diff --git a/pkg/config/config.go b/pkg/config/config.go index 1301e2d..82e4531 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -21,9 +21,7 @@ import ( "time" v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" - "sigs.k8s.io/kueue/apis/config/v1beta1" ) type OperatorConfig struct { @@ -34,21 +32,12 @@ type OperatorConfig struct { } type AppWrapperConfig struct { - EnableKueueIntegrations bool `json:"enableKueueIntegrations,omitempty"` - KueueJobReconciller *KueueJobReconcillerConfig `json:"kueueJobReconciller,omitempty"` - Autopilot *AutopilotConfig `json:"autopilot,omitempty"` - UserRBACAdmissionCheck bool `json:"userRBACAdmissionCheck,omitempty"` - FaultTolerance *FaultToleranceConfig `json:"faultTolerance,omitempty"` - SchedulerName string `json:"schedulerName,omitempty"` - DefaultQueueName string `json:"defaultQueueName,omitempty"` - SlackQueueName string `json:"slackQueueName,omitempty"` -} - -type KueueJobReconcillerConfig struct { - ManageJobsWithoutQueueName bool `json:"manageJobsWithoutQueueName,omitempty"` - ManageJobsNamespaceSelector *metav1.LabelSelector `json:"manageJobsNamespaceSelector,omitempty"` - WaitForPodsReady *v1beta1.WaitForPodsReady `json:"waitForPodsReady,omitempty"` - LabelKeysToCopy []string `json:"labelKeysToCopy,omitempty"` + Autopilot *AutopilotConfig `json:"autopilot,omitempty"` + UserRBACAdmissionCheck bool `json:"userRBACAdmissionCheck,omitempty"` + FaultTolerance *FaultToleranceConfig `json:"faultTolerance,omitempty"` + SchedulerName string `json:"schedulerName,omitempty"` + DefaultQueueName string `json:"defaultQueueName,omitempty"` + SlackQueueName string `json:"slackQueueName,omitempty"` } type AutopilotConfig struct { @@ -98,21 +87,6 @@ type HealthConfiguration struct { // NewAppWrapperConfig constructs an AppWrapperConfig and fills in default values func NewAppWrapperConfig() *AppWrapperConfig { return &AppWrapperConfig{ - EnableKueueIntegrations: true, - KueueJobReconciller: &KueueJobReconcillerConfig{ - ManageJobsWithoutQueueName: true, - ManageJobsNamespaceSelector: &metav1.LabelSelector{ - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "kubernetes.io/metadata.name", - Operator: metav1.LabelSelectorOpNotIn, - Values: []string{"kube-system", "kueue-system", "appwrapper-system"}, - }, - }, - }, - WaitForPodsReady: &v1beta1.WaitForPodsReady{Enable: true}, - LabelKeysToCopy: []string{}, - }, Autopilot: &AutopilotConfig{ InjectAntiAffinities: true, MonitorNodes: true, diff --git a/pkg/controller/setup.go b/pkg/controller/setup.go index 011ffc9..d1c38f7 100644 --- a/pkg/controller/setup.go +++ b/pkg/controller/setup.go @@ -30,27 +30,12 @@ import ( cert "github.com/open-policy-agent/cert-controller/pkg/rotator" "github.com/project-codeflare/appwrapper/internal/controller/appwrapper" - "github.com/project-codeflare/appwrapper/internal/controller/workload" "github.com/project-codeflare/appwrapper/internal/webhook" "github.com/project-codeflare/appwrapper/pkg/config" - - "sigs.k8s.io/kueue/pkg/controller/jobframework" ) // SetupControllers creates and configures all components of the AppWrapper controller func SetupControllers(mgr ctrl.Manager, awConfig *config.AppWrapperConfig) error { - if awConfig.EnableKueueIntegrations { - if err := workload.WorkloadReconciler( - mgr.GetClient(), - mgr.GetEventRecorderFor("kueue"), - jobframework.WithManageJobsWithoutQueueName(awConfig.KueueJobReconciller.ManageJobsWithoutQueueName), - jobframework.WithWaitForPodsReady(awConfig.KueueJobReconciller.WaitForPodsReady), - jobframework.WithLabelKeysToCopy(awConfig.KueueJobReconciller.LabelKeysToCopy), - ).SetupWithManager(mgr); err != nil { - return fmt.Errorf("workload controller: %w", err) - } - } - if awConfig.Autopilot != nil && awConfig.Autopilot.MonitorNodes { conduit := make(chan event.GenericEvent, 1) if err := (&appwrapper.NodeHealthMonitor{ @@ -92,11 +77,6 @@ func SetupWebhooks(mgr ctrl.Manager, awConfig *config.AppWrapperConfig) error { } func SetupIndexers(ctx context.Context, mgr ctrl.Manager, awConfig *config.AppWrapperConfig) error { - if awConfig.EnableKueueIntegrations { - if err := jobframework.SetupWorkloadOwnerIndex(ctx, mgr.GetFieldIndexer(), workload.GVK); err != nil { - return fmt.Errorf("workload indexer: %w", err) - } - } return nil } diff --git a/test/e2e/util_test.go b/test/e2e/util_test.go index 50ee3c0..d14d733 100644 --- a/test/e2e/util_test.go +++ b/test/e2e/util_test.go @@ -177,7 +177,7 @@ func createAppWrapper(ctx context.Context, components ...awv1beta2.AppWrapperCom func toAppWrapper(components ...awv1beta2.AppWrapperComponent) *awv1beta2.AppWrapper { return &awv1beta2.AppWrapper{ - TypeMeta: metav1.TypeMeta{APIVersion: awv1beta2.GroupVersion.String(), Kind: "AppWrapper"}, + TypeMeta: metav1.TypeMeta{APIVersion: awv1beta2.GroupVersion.String(), Kind: awv1beta2.AppWrapperKind}, ObjectMeta: metav1.ObjectMeta{ Name: randName("aw"), Namespace: testNamespace, From f459b91d84437b741fa4e230fc5376a743da6679 Mon Sep 17 00:00:00 2001 From: David Grove Date: Mon, 10 Mar 2025 10:24:46 -0400 Subject: [PATCH 2/7] partially update hack/kueue-config for kueue 0.11 --- .../controller_manager_config.yaml | 28 +++++++++---------- hack/kueue-config/kustomization.yaml | 15 ---------- 2 files changed, 13 insertions(+), 30 deletions(-) diff --git a/hack/kueue-config/controller_manager_config.yaml b/hack/kueue-config/controller_manager_config.yaml index 059bf50..107a73c 100644 --- a/hack/kueue-config/controller_manager_config.yaml +++ b/hack/kueue-config/controller_manager_config.yaml @@ -3,7 +3,7 @@ kind: Configuration health: healthProbeBindAddress: :8081 metrics: - bindAddress: :8080 + bindAddress: :8443 # enableClusterQueueResources: true webhook: port: 9443 @@ -26,6 +26,7 @@ clientConnection: #waitForPodsReady: # enable: false # timeout: 5m +# recoveryTimeout: 3m # blockAdmission: false # requeuingStrategy: # timestamp: Eviction @@ -34,8 +35,10 @@ clientConnection: # backoffMaxSeconds: 3600 manageJobsWithoutQueueName: true #managedJobsNamespaceSelector: -# matchLabels: -# kueue-managed: "true" +# matchExpressions: +# - key: kubernetes.io/metadata.name +# operator: NotIn +# values: [ kube-system, kueue-system ] #internalCertManagement: # enable: false # webhookServiceName: "" @@ -47,22 +50,17 @@ integrations: - "ray.io/rayjob" - "ray.io/raycluster" - "jobset.x-k8s.io/jobset" - - "kubeflow.org/mxjob" - "kubeflow.org/paddlejob" - "kubeflow.org/pytorchjob" - "kubeflow.org/tfjob" - "kubeflow.org/xgboostjob" - # - "pod" - # - "deployment" # requires enabling pod integration - # - "statefulset" # requires enabling pod integration - externalFrameworks: - - "AppWrapper.v1beta2.workload.codeflare.dev" -# podOptions: -# namespaceSelector: -# matchExpressions: -# - key: kubernetes.io/metadata.name -# operator: NotIn -# values: [ kube-system, kueue-system ] + - "workload.codeflare.dev/appwrapper" +# - "pod" +# - "deployment" # requires enabling pod integration +# - "statefulset" # requires enabling pod integration +# - "leaderworkerset.x-k8s.io/leaderworkerset" # requires enabling pod integration +# externalFrameworks: +# - "Foo.v1.example.com" #fairSharing: # enable: true # preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare] diff --git a/hack/kueue-config/kustomization.yaml b/hack/kueue-config/kustomization.yaml index 5ecd9f1..8d3f6e1 100644 --- a/hack/kueue-config/kustomization.yaml +++ b/hack/kueue-config/kustomization.yaml @@ -16,21 +16,6 @@ images: newTag: v0.10.2 patches: -- target: - kind: ClusterRole - name: manager-role - patch: | - - op: add - path: /rules/- - value: - apiGroups: - - workload.codeflare.dev - resources: - - appwrappers - verbs: - - get - - list - - watch - target: kind: Deployment name: controller-manager From 10c72927abf4569b6768ceed5cd46d19cbab6b40 Mon Sep 17 00:00:00 2001 From: David Grove Date: Mon, 10 Mar 2025 10:46:42 -0400 Subject: [PATCH 3/7] remove slack cluster queue functionality --- Makefile | 2 - cmd/main.go | 3 - config/rbac/role.yaml | 10 -- .../controller/appwrapper/fixtures_test.go | 14 -- .../appwrapper/node_health_monitor.go | 17 --- .../appwrapper/node_health_monitor_test.go | 122 ---------------- .../controller/appwrapper/slackcq_monitor.go | 132 ------------------ internal/controller/appwrapper/suite_test.go | 5 +- pkg/config/config.go | 1 - pkg/controller/setup.go | 12 -- test/e2e/appwrapper_test.go | 5 +- 11 files changed, 2 insertions(+), 321 deletions(-) delete mode 100644 internal/controller/appwrapper/slackcq_monitor.go diff --git a/Makefile b/Makefile index 08818d8..0e8c877 100644 --- a/Makefile +++ b/Makefile @@ -99,8 +99,6 @@ EXTERNAL_CRDS_DIR ?= $(shell pwd)/dep-crds KUEUE_ROOT = $(shell go list -m -mod=readonly -f "{{.Dir}}" sigs.k8s.io/kueue) .PHONY: dep-crds dep-crds: ## Copy CRDs from external operators to dep-crds directory. - mkdir -p $(EXTERNAL_CRDS_DIR)/kueue - cp -f $(KUEUE_ROOT)/config/components/crd/bases/* $(EXTERNAL_CRDS_DIR)/kueue .PHONY: test test: manifests generate fmt vet dep-crds envtest ## Run unit tests. diff --git a/cmd/main.go b/cmd/main.go index 5ecbe8f..5b5a426 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -47,8 +47,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/webhook" "sigs.k8s.io/yaml" - kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" - awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" "github.com/project-codeflare/appwrapper/internal/metrics" "github.com/project-codeflare/appwrapper/pkg/config" @@ -66,7 +64,6 @@ var ( func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) - utilruntime.Must(kueue.AddToScheme(scheme)) utilruntime.Must(awv1beta2.AddToScheme(scheme)) //+kubebuilder:scaffold:scheme } diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 54d6a5a..591868c 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -105,16 +105,6 @@ rules: - patch - update - watch -- apiGroups: - - kueue.x-k8s.io - resources: - - clusterqueues - verbs: - - get - - list - - patch - - update - - watch - apiGroups: - ray.io resources: diff --git a/internal/controller/appwrapper/fixtures_test.go b/internal/controller/appwrapper/fixtures_test.go index cf3c853..8e6e900 100644 --- a/internal/controller/appwrapper/fixtures_test.go +++ b/internal/controller/appwrapper/fixtures_test.go @@ -30,7 +30,6 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" - kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" "sigs.k8s.io/yaml" awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" @@ -217,16 +216,3 @@ func malformedPod(milliCPU int64) awv1beta2.AppWrapperComponent { Template: runtime.RawExtension{Raw: jsonBytes}, } } - -func slackQueue(queueName string, nominalQuota resource.Quantity) *kueue.ClusterQueue { - return &kueue.ClusterQueue{ - TypeMeta: metav1.TypeMeta{APIVersion: kueue.GroupVersion.String(), Kind: "ClusterQueue"}, - ObjectMeta: metav1.ObjectMeta{Name: queueName}, - Spec: kueue.ClusterQueueSpec{ - ResourceGroups: []kueue.ResourceGroup{{ - CoveredResources: []v1.ResourceName{v1.ResourceName("nvidia.com/gpu")}, - Flavors: []kueue.FlavorQuotas{{ - Name: "default-flavor", - Resources: []kueue.ResourceQuota{{Name: v1.ResourceName("nvidia.com/gpu"), NominalQuota: nominalQuota}}}}}}}, - } -} diff --git a/internal/controller/appwrapper/node_health_monitor.go b/internal/controller/appwrapper/node_health_monitor.go index 78af0a3..76fce24 100644 --- a/internal/controller/appwrapper/node_health_monitor.go +++ b/internal/controller/appwrapper/node_health_monitor.go @@ -24,12 +24,10 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/log" @@ -44,7 +42,6 @@ import ( type NodeHealthMonitor struct { client.Client Config *config.AppWrapperConfig - Events chan event.GenericEvent // event channel for NodeHealthMonitor to trigger SlackClusterQueueMonitor } var ( @@ -85,16 +82,6 @@ func (r *NodeHealthMonitor) Reconcile(ctx context.Context, req ctrl.Request) (ct return ctrl.Result{}, nil } -func (r *NodeHealthMonitor) triggerSlackCQMonitor() { - if r.Config.SlackQueueName != "" { - select { - case r.Events <- event.GenericEvent{Object: &metav1.PartialObjectMetadata{ObjectMeta: metav1.ObjectMeta{Name: r.Config.SlackQueueName}}}: - default: - // do not block if event is already in channel - } - } -} - // update noExecuteNodes and noScheduleNodes for the deletion of nodeName func (r *NodeHealthMonitor) updateForNodeDeletion(ctx context.Context, nodeName string) { if _, ok := noExecuteNodes[nodeName]; ok { @@ -103,7 +90,6 @@ func (r *NodeHealthMonitor) updateForNodeDeletion(ctx context.Context, nodeName noExecuteNodesMutex.Unlock() // END CRITICAL SECTION log.FromContext(ctx).Info("Updated NoExecute information due to Node deletion", "Number NoExecute Nodes", len(noExecuteNodes), "NoExecute Resource Details", noExecuteNodes) - r.triggerSlackCQMonitor() } if _, ok := noScheduleNodes[nodeName]; ok { noScheduleNodesMutex.Lock() // BEGIN CRITICAL SECTION @@ -111,7 +97,6 @@ func (r *NodeHealthMonitor) updateForNodeDeletion(ctx context.Context, nodeName noScheduleNodesMutex.Unlock() // END CRITICAL SECTION log.FromContext(ctx).Info("Updated NoSchedule information due to Node deletion", "Number NoSchedule Nodes", len(noScheduleNodes), "NoSchedule Resource Details", noScheduleNodes) - r.triggerSlackCQMonitor() } } @@ -146,7 +131,6 @@ func (r *NodeHealthMonitor) updateNoExecuteNodes(ctx context.Context, node *v1.N if noExecuteNodesChanged { log.FromContext(ctx).Info("Updated NoExecute information", "Number NoExecute Nodes", len(noExecuteNodes), "NoExecute Resource Details", noExecuteNodes) - r.triggerSlackCQMonitor() } } @@ -192,7 +176,6 @@ func (r *NodeHealthMonitor) updateNoScheduleNodes(ctx context.Context, node *v1. if noScheduleNodesChanged { log.FromContext(ctx).Info("Updated NoSchedule information", "Number NoSchedule Nodes", len(noScheduleNodes), "NoSchedule Resource Details", noScheduleNodes) - r.triggerSlackCQMonitor() } } diff --git a/internal/controller/appwrapper/node_health_monitor_test.go b/internal/controller/appwrapper/node_health_monitor_test.go index debdbea..744c59b 100644 --- a/internal/controller/appwrapper/node_health_monitor_test.go +++ b/internal/controller/appwrapper/node_health_monitor_test.go @@ -24,17 +24,13 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) var _ = Describe("NodeMonitor Controller", func() { - var slackQueueName = "fake-queue" - var dispatch = types.NamespacedName{Name: slackQueueName} var node1Name = types.NamespacedName{Name: "fake-node-1"} var node2Name = types.NamespacedName{Name: "fake-node-2"} var nodeMonitor *NodeHealthMonitor - var cqMonitor *SlackClusterQueueMonitor nodeGPUs := v1.ResourceList{v1.ResourceName("nvidia.com/gpu"): resource.MustParse("4")} createNode := func(nodeName string) { @@ -58,23 +54,14 @@ var _ = Describe("NodeMonitor Controller", func() { BeforeEach(func() { // Create reconcillers awConfig := config.NewAppWrapperConfig() - awConfig.SlackQueueName = slackQueueName - conduit := make(chan event.GenericEvent, 1) nodeMonitor = &NodeHealthMonitor{ Client: k8sClient, Config: awConfig, - Events: conduit, - } - cqMonitor = &SlackClusterQueueMonitor{ - Client: k8sClient, - Config: awConfig, - Events: conduit, } }) AfterEach(func() { nodeMonitor = nil - cqMonitor = nil }) It("Autopilot Monitoring", func() { @@ -120,113 +107,4 @@ var _ = Describe("NodeMonitor Controller", func() { deleteNode(node1Name.Name) deleteNode(node2Name.Name) }) - - It("ClusterQueue Lending Adjustment", func() { - createNode(node1Name.Name) - createNode(node2Name.Name) - - _, err := nodeMonitor.Reconcile(ctx, reconcile.Request{NamespacedName: node1Name}) - Expect(err).NotTo(HaveOccurred()) - _, err = nodeMonitor.Reconcile(ctx, reconcile.Request{NamespacedName: node2Name}) - Expect(err).NotTo(HaveOccurred()) - - // start with 6 gpus - queue := slackQueue(slackQueueName, resource.MustParse("6")) - Expect(k8sClient.Create(ctx, queue)).To(Succeed()) - - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: slackQueueName}, queue)).Should(Succeed()) - Expect(queue.Spec.ResourceGroups[0].Flavors[0].Resources[0].LendingLimit).Should(BeNil()) - - // remove 4 gpus, lending limit should be 2 - node1 := getNode(node1Name.Name) - node1.Labels["autopilot.ibm.com/gpuhealth"] = "EVICT" - Expect(k8sClient.Update(ctx, node1)).Should(Succeed()) - _, err = nodeMonitor.Reconcile(ctx, reconcile.Request{NamespacedName: node1Name}) - Expect(err).NotTo(HaveOccurred()) - _, err = cqMonitor.Reconcile(ctx, reconcile.Request{NamespacedName: dispatch}) - Expect(err).NotTo(HaveOccurred()) - - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: slackQueueName}, queue)).Should(Succeed()) - Expect(queue.Spec.ResourceGroups[0].Flavors[0].Resources[0].LendingLimit.Value()).Should(Equal(int64(2))) - - // remove another 4 gpus, lending limit should be 0 = max(0, 6-4-4) - node2 := getNode(node2Name.Name) - node2.Labels["autopilot.ibm.com/gpuhealth"] = "TESTING" - Expect(k8sClient.Update(ctx, node2)).Should(Succeed()) - _, err = nodeMonitor.Reconcile(ctx, reconcile.Request{NamespacedName: node2Name}) - Expect(err).NotTo(HaveOccurred()) - _, err = cqMonitor.Reconcile(ctx, reconcile.Request{NamespacedName: dispatch}) - Expect(err).NotTo(HaveOccurred()) - - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: slackQueueName}, queue)).Should(Succeed()) - Expect(queue.Spec.ResourceGroups[0].Flavors[0].Resources[0].LendingLimit).ShouldNot(BeNil()) - Expect(queue.Spec.ResourceGroups[0].Flavors[0].Resources[0].LendingLimit.Value()).Should(Equal(int64(0))) - - // restore 4 gpus, lending limit should be 2 - node1.Labels["autopilot.ibm.com/gpuhealth"] = "OK" - Expect(k8sClient.Update(ctx, node1)).Should(Succeed()) - _, err = nodeMonitor.Reconcile(ctx, reconcile.Request{NamespacedName: node1Name}) - Expect(err).NotTo(HaveOccurred()) - _, err = cqMonitor.Reconcile(ctx, reconcile.Request{NamespacedName: dispatch}) - Expect(err).NotTo(HaveOccurred()) - - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: slackQueueName}, queue)).Should(Succeed()) - Expect(queue.Spec.ResourceGroups[0].Flavors[0].Resources[0].LendingLimit).ShouldNot(BeNil()) - Expect(queue.Spec.ResourceGroups[0].Flavors[0].Resources[0].LendingLimit.Value()).Should(Equal(int64(2))) - - // restore last 4 gpus, lending limit should be nil - node2.Labels["autopilot.ibm.com/gpuhealth"] = "OK" - Expect(k8sClient.Update(ctx, node2)).Should(Succeed()) - _, err = nodeMonitor.Reconcile(ctx, reconcile.Request{NamespacedName: node2Name}) - Expect(err).NotTo(HaveOccurred()) - _, err = cqMonitor.Reconcile(ctx, reconcile.Request{NamespacedName: dispatch}) - Expect(err).NotTo(HaveOccurred()) - - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: slackQueueName}, queue)).Should(Succeed()) - Expect(queue.Spec.ResourceGroups[0].Flavors[0].Resources[0].LendingLimit).Should(BeNil()) - - // cordon node1, lending limit should be 2 - node1 = getNode(node1Name.Name) - node1.Spec.Unschedulable = true - Expect(k8sClient.Update(ctx, node1)).Should(Succeed()) - _, err = nodeMonitor.Reconcile(ctx, reconcile.Request{NamespacedName: node1Name}) - Expect(err).NotTo(HaveOccurred()) - _, err = cqMonitor.Reconcile(ctx, reconcile.Request{NamespacedName: dispatch}) - Expect(err).NotTo(HaveOccurred()) - - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: slackQueueName}, queue)).Should(Succeed()) - Expect(queue.Spec.ResourceGroups[0].Flavors[0].Resources[0].LendingLimit.Value()).Should(Equal(int64(2))) - - // Increase the slack cluster queue's quota by 2 and expect LendngLimit to increase by 2 to become 4 - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: slackQueueName}, queue)).Should(Succeed()) - queue.Spec.ResourceGroups[0].Flavors[0].Resources[0].NominalQuota = resource.MustParse("8") - Expect(k8sClient.Update(ctx, queue)).Should(Succeed()) - _, err = cqMonitor.Reconcile(ctx, reconcile.Request{NamespacedName: types.NamespacedName{Name: slackQueueName}}) - Expect(err).NotTo(HaveOccurred()) - - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: slackQueueName}, queue)).Should(Succeed()) - Expect(queue.Spec.ResourceGroups[0].Flavors[0].Resources[0].LendingLimit.Value()).Should(Equal(int64(4))) - - // Deleting a noncordoned node should not change the lending limit - deleteNode(node2Name.Name) - _, err = nodeMonitor.Reconcile(ctx, reconcile.Request{NamespacedName: node2Name}) - Expect(err).NotTo(HaveOccurred()) - _, err = cqMonitor.Reconcile(ctx, reconcile.Request{NamespacedName: dispatch}) - Expect(err).NotTo(HaveOccurred()) - - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: slackQueueName}, queue)).Should(Succeed()) - Expect(queue.Spec.ResourceGroups[0].Flavors[0].Resources[0].LendingLimit.Value()).Should(Equal(int64(4))) - - // Delete the cordoned node; lending limit should now by nil - deleteNode(node1Name.Name) - _, err = nodeMonitor.Reconcile(ctx, reconcile.Request{NamespacedName: node1Name}) - Expect(err).NotTo(HaveOccurred()) - _, err = cqMonitor.Reconcile(ctx, reconcile.Request{NamespacedName: dispatch}) - Expect(err).NotTo(HaveOccurred()) - - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: slackQueueName}, queue)).Should(Succeed()) - Expect(queue.Spec.ResourceGroups[0].Flavors[0].Resources[0].LendingLimit).Should(BeNil()) - - Expect(k8sClient.Delete(ctx, queue)).To(Succeed()) - }) }) diff --git a/internal/controller/appwrapper/slackcq_monitor.go b/internal/controller/appwrapper/slackcq_monitor.go deleted file mode 100644 index df9b482..0000000 --- a/internal/controller/appwrapper/slackcq_monitor.go +++ /dev/null @@ -1,132 +0,0 @@ -/* -Copyright 2024 IBM Corporation. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package appwrapper - -import ( - "context" - - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/resource" - "k8s.io/apimachinery/pkg/types" - "k8s.io/utils/ptr" - - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/event" - "sigs.k8s.io/controller-runtime/pkg/handler" - "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/source" - kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" - - "github.com/project-codeflare/appwrapper/pkg/config" -) - -// SlackClusterQueueMonitor uses the information gathered by the NodeHealthMonitor to -// adjust the lending limitLimits of a designated slack ClusterQueue -type SlackClusterQueueMonitor struct { - client.Client - Config *config.AppWrapperConfig - Events chan event.GenericEvent // event channel for NodeHealthMonitor to trigger SlackClusterQueueMonitor -} - -// permission to watch, get and update clusterqueues -//+kubebuilder:rbac:groups=kueue.x-k8s.io,resources=clusterqueues,verbs=get;list;watch;update;patch - -func (r *SlackClusterQueueMonitor) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - if req.Name != r.Config.SlackQueueName { - return ctrl.Result{}, nil - } - - cq := &kueue.ClusterQueue{} - if err := r.Get(ctx, types.NamespacedName{Name: r.Config.SlackQueueName}, cq); err != nil { - if errors.IsNotFound(err) { - return ctrl.Result{}, nil // give up if slack cluster queue is not defined - } - return ctrl.Result{}, err - } - - // Compute the total quantities of unschedulable resources - unschedulableQuantities := map[v1.ResourceName]*resource.Quantity{} - noScheduleNodesMutex.RLock() // BEGIN CRITICAL SECTION - for _, quantities := range noScheduleNodes { - for resourceName, quantity := range quantities { - if !quantity.IsZero() { - if unschedulableQuantities[resourceName] == nil { - unschedulableQuantities[resourceName] = ptr.To(quantity) - } else { - unschedulableQuantities[resourceName].Add(quantity) - } - } - } - } - noScheduleNodesMutex.RUnlock() // END CRITICAL SECTION - - // enforce lending limits on 1st flavor of 1st resource group - resources := cq.Spec.ResourceGroups[0].Flavors[0].Resources - delta := map[v1.ResourceName]*resource.Quantity{} - for i, quota := range resources { - var lendingLimit *resource.Quantity - if unschedulableQuantity := unschedulableQuantities[quota.Name]; unschedulableQuantity != nil { - if quota.NominalQuota.Cmp(*unschedulableQuantity) > 0 { - lendingLimit = ptr.To(quota.NominalQuota) - lendingLimit.Sub(*unschedulableQuantity) - } else { - lendingLimit = resource.NewQuantity(0, resource.DecimalSI) - } - } - if quota.LendingLimit == nil && lendingLimit != nil { - delta[quota.Name] = ptr.To(quota.NominalQuota) - delta[quota.Name].Sub(*lendingLimit) - delta[quota.Name].Neg() - resources[i].LendingLimit = lendingLimit - } else if quota.LendingLimit != nil && lendingLimit == nil { - delta[quota.Name] = ptr.To(quota.NominalQuota) - delta[quota.Name].Sub(*quota.LendingLimit) - resources[i].LendingLimit = lendingLimit - } else if quota.LendingLimit != nil && lendingLimit != nil && quota.LendingLimit.Cmp(*lendingLimit) != 0 { - delta[quota.Name] = ptr.To(*quota.LendingLimit) - delta[quota.Name].Sub(*lendingLimit) - delta[quota.Name].Neg() - resources[i].LendingLimit = lendingLimit - } - } - - // update lending limits - if len(delta) > 0 { - err := r.Update(ctx, cq) - if err == nil { - log.FromContext(ctx).Info("Updated lending limits", "Changed by", delta, "Updated Resources", resources) - return ctrl.Result{}, nil - } else if errors.IsConflict(err) { - return ctrl.Result{Requeue: true}, nil - } else { - return ctrl.Result{}, err - } - } - - return ctrl.Result{}, nil -} - -// SetupWithManager sets up the controller with the Manager. -func (r *SlackClusterQueueMonitor) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - Watches(&kueue.ClusterQueue{}, &handler.EnqueueRequestForObject{}). - WatchesRawSource(source.Channel(r.Events, &handler.EnqueueRequestForObject{})). - Named("SlackClusterQueueMonitor"). - Complete(r) -} diff --git a/internal/controller/appwrapper/suite_test.go b/internal/controller/appwrapper/suite_test.go index d332cf4..e979b52 100644 --- a/internal/controller/appwrapper/suite_test.go +++ b/internal/controller/appwrapper/suite_test.go @@ -39,7 +39,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" awv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" - kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" ) // These tests use Ginkgo (BDD-style Go testing framework). Refer to @@ -66,7 +65,7 @@ var _ = BeforeSuite(func() { testEnv = &envtest.Environment{ CRDDirectoryPaths: []string{ filepath.Join("..", "..", "..", "config", "crd", "bases"), - filepath.Join("..", "..", "..", "dep-crds", "kueue")}, + }, ErrorIfCRDPathMissing: true, // The BinaryAssetsDirectory is only required if you want to run the tests directly @@ -94,8 +93,6 @@ var _ = BeforeSuite(func() { Expect(err).NotTo(HaveOccurred()) err = clientgoscheme.AddToScheme(scheme) Expect(err).NotTo(HaveOccurred()) - err = kueue.AddToScheme(scheme) - Expect(err).NotTo(HaveOccurred()) //+kubebuilder:scaffold:scheme diff --git a/pkg/config/config.go b/pkg/config/config.go index 82e4531..63e9314 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -37,7 +37,6 @@ type AppWrapperConfig struct { FaultTolerance *FaultToleranceConfig `json:"faultTolerance,omitempty"` SchedulerName string `json:"schedulerName,omitempty"` DefaultQueueName string `json:"defaultQueueName,omitempty"` - SlackQueueName string `json:"slackQueueName,omitempty"` } type AutopilotConfig struct { diff --git a/pkg/controller/setup.go b/pkg/controller/setup.go index d1c38f7..c1a653c 100644 --- a/pkg/controller/setup.go +++ b/pkg/controller/setup.go @@ -24,7 +24,6 @@ import ( "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/healthz" cert "github.com/open-policy-agent/cert-controller/pkg/rotator" @@ -37,23 +36,12 @@ import ( // SetupControllers creates and configures all components of the AppWrapper controller func SetupControllers(mgr ctrl.Manager, awConfig *config.AppWrapperConfig) error { if awConfig.Autopilot != nil && awConfig.Autopilot.MonitorNodes { - conduit := make(chan event.GenericEvent, 1) if err := (&appwrapper.NodeHealthMonitor{ Client: mgr.GetClient(), Config: awConfig, - Events: conduit, }).SetupWithManager(mgr); err != nil { return fmt.Errorf("node health monitor: %w", err) } - if awConfig.SlackQueueName != "" { - if err := (&appwrapper.SlackClusterQueueMonitor{ - Client: mgr.GetClient(), - Config: awConfig, - Events: conduit, - }).SetupWithManager(mgr); err != nil { - return fmt.Errorf("slack cluster queue monitor: %w", err) - } - } } if err := (&appwrapper.AppWrapperReconciler{ diff --git a/test/e2e/appwrapper_test.go b/test/e2e/appwrapper_test.go index a07943a..a1146ad 100644 --- a/test/e2e/appwrapper_test.go +++ b/test/e2e/appwrapper_test.go @@ -108,10 +108,7 @@ var _ = Describe("AppWrapper E2E Test", func() { It("JobSet", func() { aw := createAppWrapper(ctx, jobset(500)) appwrappers = append(appwrappers, aw) - // TODO: Need dev versions of kueue/jobset to get correct handling of ownership - // Once those are released change the test to: - // Expect(waitAWPodsReady(ctx, aw)).Should(Succeed()) - Eventually(AppWrapperPhase(ctx, aw), 15*time.Second).Should(Equal(awv1beta2.AppWrapperResuming)) + Expect(waitAWPodsReady(ctx, aw)).Should(Succeed()) }) }) From e42f99ac457282ca37c534fd6cea0149f9e4ccf7 Mon Sep 17 00:00:00 2001 From: David Grove Date: Mon, 10 Mar 2025 17:49:59 -0400 Subject: [PATCH 4/7] appwrapper will work with a default-configured Kueue 0.11 --- .../controller_manager_config.yaml | 74 ------------------- hack/kueue-config/kustomization.yaml | 6 -- 2 files changed, 80 deletions(-) delete mode 100644 hack/kueue-config/controller_manager_config.yaml diff --git a/hack/kueue-config/controller_manager_config.yaml b/hack/kueue-config/controller_manager_config.yaml deleted file mode 100644 index 107a73c..0000000 --- a/hack/kueue-config/controller_manager_config.yaml +++ /dev/null @@ -1,74 +0,0 @@ -apiVersion: config.kueue.x-k8s.io/v1beta1 -kind: Configuration -health: - healthProbeBindAddress: :8081 -metrics: - bindAddress: :8443 -# enableClusterQueueResources: true -webhook: - port: 9443 -leaderElection: - leaderElect: true - resourceName: c1f6bfd2.kueue.x-k8s.io -controller: - groupKindConcurrency: - Job.batch: 5 - Pod: 5 - Workload.kueue.x-k8s.io: 5 - LocalQueue.kueue.x-k8s.io: 1 - Cohort.kueue.x-k8s.io: 1 - ClusterQueue.kueue.x-k8s.io: 1 - ResourceFlavor.kueue.x-k8s.io: 1 -clientConnection: - qps: 50 - burst: 100 -#pprofBindAddress: :8083 -#waitForPodsReady: -# enable: false -# timeout: 5m -# recoveryTimeout: 3m -# blockAdmission: false -# requeuingStrategy: -# timestamp: Eviction -# backoffLimitCount: null # null indicates infinite requeuing -# backoffBaseSeconds: 60 -# backoffMaxSeconds: 3600 -manageJobsWithoutQueueName: true -#managedJobsNamespaceSelector: -# matchExpressions: -# - key: kubernetes.io/metadata.name -# operator: NotIn -# values: [ kube-system, kueue-system ] -#internalCertManagement: -# enable: false -# webhookServiceName: "" -# webhookSecretName: "" -integrations: - frameworks: - - "batch/job" - - "kubeflow.org/mpijob" - - "ray.io/rayjob" - - "ray.io/raycluster" - - "jobset.x-k8s.io/jobset" - - "kubeflow.org/paddlejob" - - "kubeflow.org/pytorchjob" - - "kubeflow.org/tfjob" - - "kubeflow.org/xgboostjob" - - "workload.codeflare.dev/appwrapper" -# - "pod" -# - "deployment" # requires enabling pod integration -# - "statefulset" # requires enabling pod integration -# - "leaderworkerset.x-k8s.io/leaderworkerset" # requires enabling pod integration -# externalFrameworks: -# - "Foo.v1.example.com" -#fairSharing: -# enable: true -# preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare] -#resources: -# excludeResourcePrefixes: [] -# transformations: -# - input: nvidia.com/mig-4g.5gb -# strategy: Replace | Retain -# outputs: -# example.com/accelerator-memory: 5Gi -# example.com/accelerator-gpc: 4 diff --git a/hack/kueue-config/kustomization.yaml b/hack/kueue-config/kustomization.yaml index 8d3f6e1..edb66be 100644 --- a/hack/kueue-config/kustomization.yaml +++ b/hack/kueue-config/kustomization.yaml @@ -4,12 +4,6 @@ kind: Kustomization resources: - "https://github.com/kubernetes-sigs/kueue/config/default?ref=v0.10.2" -configMapGenerator: -- name: manager-config - behavior: replace - files: - - controller_manager_config.yaml - images: - name: us-central1-docker.pkg.dev/k8s-staging-images/kueue/kueue newName: registry.k8s.io/kueue/kueue From 6deeff694c982d70c9b3212d19b63ee8592cb901 Mon Sep 17 00:00:00 2001 From: David Grove Date: Mon, 10 Mar 2025 17:51:33 -0400 Subject: [PATCH 5/7] Update AppWrapper architecture for Kueue 0.11 --- site/_pages/arch-controller.md | 61 +++++++++------------------------- 1 file changed, 15 insertions(+), 46 deletions(-) diff --git a/site/_pages/arch-controller.md b/site/_pages/arch-controller.md index 52451c7..bc6a372 100644 --- a/site/_pages/arch-controller.md +++ b/site/_pages/arch-controller.md @@ -4,55 +4,24 @@ title: "AppWrapper Controllers" classes: wide --- -Kueue has a well-developed pattern for Kueue-enabling a Custom -Resource Definition and its associated operator. Following this pattern -allows the resulting operator to smoothly run alongside the core Kueue -operator. The pattern consists of three main elements: an Admission -Controller, a Workload Controller, and a Framework Controller. +Kueue has a [well-developed pattern](https://kueue.sigs.k8s.io/docs/tasks/dev/integrate_a_custom_job/) +for Kueue-enabling a Custom Resource Definition and its associated operator. -#### Admission Controller +AppWrapper version 1.0.6 and earlier was an *external Kueue integration* +and therefore versions of the AppWrapper controller were closely tied to a matching +Kueue version (Kueue 0.7 through Kueue 0.10). -Kueue requires the definition of an Admission Controller that ensures -that the `.spec.suspend` field of newly created AppWrapper instances is -set to true. We also leverage the Admission Controller to ensure that -the user creating the AppWrapper is entitled to create all wrapped resources -and to validate AppWrapper-specific invariants. +Starting with Kueue 0.11 and AppWrapper version 1.1, AppWrapper becamse a first class +*built-in Kueue integration*. This allows a looser coupling between the +two controllers and a significant simplification in their joint deployment and configuration. -See [appwrapper_webhook.go]({{ site.gh_main_url }}/internal/webhook/appwrapper_webhook.go) -for the implementation. - -#### Workload Controller - -The Workload Controller is an instantiation of Kueue’s JobReconciller -along with an implementation of Kueue’s GenericJob interface for -AppWrappers. As is standard practice in Kueue, this controller will -watch AppWrapper instances and their owned Workload instances to -reconcile the two. This controller makes it possible for Kueue to -suspend and resume the AppWrapper and constrain the placement of pods -created by the wrapped resources of the AppWrapper. It also translates -the status of the AppWrapper to the format expected by Kueue. - -See [workload_controller.go]({{ site.gh_main_url }}/internal/controller/workload/workload_controller.go) -for the implementation. - -To ensure smooth interoperation with all possible configurations of Kueue, -it is recommended to register AppWrappers as an -[externalFramework](https://kueue.sigs.k8s.io/docs/tasks/dev/integrate_a_custom_job/#building-an-external-integration) -with Kueue. Our script [deploy-kueue.sh]({{ site.gh_main_url }}/hack/deploy-kueue.sh) automates -this. The script accomplishes the following tasks: - -1. Adds the ability to get, list, and watch AppWrappers to the RBACs for Kueue's manager role. -2. Adds `"AppWrapper.v1beta2.workload.codeflare.dev"` to the list of `externalFrameworks` in - Kueue's manager configuration. - - -#### Framework Controller +#### AppWrapper Controller -The Framework Controller is a standard reconciliation loop that watches AppWrapper instances and +The AppWrapper Controller is a standard reconciliation loop that watches AppWrapper instances and is responsible for all AppWrapper-specific operations including creating, monitoring, and deleting the wrapped resources in response to the modifications of the AppWrapper instance’s specification and -status made by the Workload Controller described above. +status. ```mermaid! --- @@ -103,7 +72,7 @@ stateDiagram-v2 The state diagram above depicts the transitions between the Phases of an AppWrapper. A label on an edge indicates the state change that will trigger that transition. For example, if an AppWrapper is in the -Suspended Phase and `spec.suspend` becomes False then the Framework Controller +Suspended Phase and Kueue sets `spec.suspend` to `false` then the AppWrapper Controller will transition the AppWrapper to the Resuming Phase. These states are augmented by two orthogonal Conditions: @@ -113,16 +82,16 @@ These states are augmented by two orthogonal Conditions: QuotaReserved and ResourcesDeployed are both true in states colored blue below. QuotaReserved and ResourcesDeployed will initially be true in the Failed state (pink), -but will become false when the Framework Controller succeeds at deleting all resources created +but will become false when the AppWrapper Controller succeeds at deleting all resources created in the Resuming phase. ResourcesDeployed will be true in the Succeeded state (green), but QuotaReserved will be false. -After a configurable delay, the Framework controller will eventually delete the resources of +After a configurable delay, the AppWrapper controller will eventually delete the resources of Succeeded AppWrappers and ResourcesDeployed will become false. Any phase may transition to the Terminating phase (not shown) when the AppWrapper is deleted. During the Terminating phase, QuotaReserved and ResourcesDeployed may initially be true -but will become false once the Framework Controller succeeds at deleting all associated resources. +but will become false once the AppWrapper Controller succeeds at deleting all associated resources. See [appwrapper_controller.go]({{ site.gh_main_url }}/internal/controller/appwrapper/appwrapper_controller.go) for the implementation. From 4cd86a2d9c96f544eb17842e0f97728aeb909cc8 Mon Sep 17 00:00:00 2001 From: David Grove Date: Wed, 12 Mar 2025 08:56:42 -0400 Subject: [PATCH 6/7] Update to kueue v0.11.0-rc1 --- go.mod | 2 +- go.sum | 4 ++-- hack/kueue-config/kustomization.yaml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index aac3a3b..1f94f12 100644 --- a/go.mod +++ b/go.mod @@ -19,7 +19,7 @@ require ( sigs.k8s.io/controller-runtime v0.20.2 sigs.k8s.io/controller-tools v0.16.5 sigs.k8s.io/jobset v0.8.0 - sigs.k8s.io/kueue v0.11.0-devel.0.20250310124347-c663d0b0faeb + sigs.k8s.io/kueue v0.11.0-rc.0 sigs.k8s.io/kustomize/kustomize/v5 v5.5.0 sigs.k8s.io/yaml v1.4.0 ) diff --git a/go.sum b/go.sum index 50961b9..056bc60 100644 --- a/go.sum +++ b/go.sum @@ -296,8 +296,8 @@ sigs.k8s.io/jobset v0.8.0 h1:80cJcPld+IMdKFOqzEW4et3Y6lGAPcP8YmBZ+aiKGYA= sigs.k8s.io/jobset v0.8.0/go.mod h1:yitjuGOExl2p964nhyevQGIkfiPSRHcdC3zNBneKCT8= sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8= sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo= -sigs.k8s.io/kueue v0.11.0-devel.0.20250310124347-c663d0b0faeb h1:DHwLXuSj1NMqOIxwHhxHiXXHS1CBkIsYeU5narw4j1A= -sigs.k8s.io/kueue v0.11.0-devel.0.20250310124347-c663d0b0faeb/go.mod h1:+2mf2ItP10M/Wa5nixARFOO+ApxArSKxLGhKtmqjaRc= +sigs.k8s.io/kueue v0.11.0-rc.0 h1:/eziIvKyvjbLU7qt0Q9iohbcwv2Ly0ZgHt/iNZzvlso= +sigs.k8s.io/kueue v0.11.0-rc.0/go.mod h1:+2mf2ItP10M/Wa5nixARFOO+ApxArSKxLGhKtmqjaRc= sigs.k8s.io/kustomize/api v0.18.0 h1:hTzp67k+3NEVInwz5BHyzc9rGxIauoXferXyjv5lWPo= sigs.k8s.io/kustomize/api v0.18.0/go.mod h1:f8isXnX+8b+SGLHQ6yO4JG1rdkZlvhaCf/uZbLVMb0U= sigs.k8s.io/kustomize/kustomize/v5 v5.5.0 h1:o1mtt6vpxsxDYaZKrw3BnEtc+pAjLz7UffnIvHNbvW0= diff --git a/hack/kueue-config/kustomization.yaml b/hack/kueue-config/kustomization.yaml index edb66be..c6eec93 100644 --- a/hack/kueue-config/kustomization.yaml +++ b/hack/kueue-config/kustomization.yaml @@ -2,12 +2,12 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: -- "https://github.com/kubernetes-sigs/kueue/config/default?ref=v0.10.2" +- "https://github.com/kubernetes-sigs/kueue/config/default?ref=v0.11.0-rc.0" images: - name: us-central1-docker.pkg.dev/k8s-staging-images/kueue/kueue newName: registry.k8s.io/kueue/kueue - newTag: v0.10.2 + newTag: v0.11.0-rc.0 patches: - target: From 2dc135201cd220c45b1a57b4c8a5b5976ef4cbd9 Mon Sep 17 00:00:00 2001 From: David Grove Date: Wed, 12 Mar 2025 09:45:44 -0400 Subject: [PATCH 7/7] kueue rc images only available from staging-images --- hack/kueue-config/kustomization.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hack/kueue-config/kustomization.yaml b/hack/kueue-config/kustomization.yaml index c6eec93..79f9b79 100644 --- a/hack/kueue-config/kustomization.yaml +++ b/hack/kueue-config/kustomization.yaml @@ -6,7 +6,8 @@ resources: images: - name: us-central1-docker.pkg.dev/k8s-staging-images/kueue/kueue - newName: registry.k8s.io/kueue/kueue + # TEMP -- disable rename when using an RC build + # newName: registry.k8s.io/kueue/kueue newTag: v0.11.0-rc.0 patches: